Fecha de creación: 15/09/2022
Grupo: 6
Autores:
Práctico entregable: Utilizar la base de jugadores “players_22.csv” disponible en la página de Kaggle https://www.kaggle.com/datasets/stefanoleone992/fifa-22-complete-player-dataset . Considerar que la base 2022 no tiene el mismo formato que la base vista en clase, a los nombres de las variables se les agregó una keyword para identificar a qué tipo de habilidad corresponde.
Con la nueva base, realizar un análisis análogo al que realizamos en el cursado de la materia con los datos FIFA2019. Realice comentarios en cada parte (verbose=True ;))
Empezamos cargando algunas herramientas para cargar los datos y manipularlos.
import os
import numpy as np
import pandas as pd
pd.set_option('display.max_columns',100)
pd.set_option('display.max_rows',1000)
import itertools
import warnings
warnings.filterwarnings("ignore")
import io
Para visualización usaremos principalmente plotly, también seaborn y matplotlib.
from plotly.offline import init_notebook_mode, plot,iplot
import plotly.graph_objs as go
init_notebook_mode(connected=True)
import matplotlib.pyplot as plt
import plotly.tools as tls#visualization
import plotly.figure_factory as ff#visualization
import seaborn as sns
from sklearn.manifold import TSNE
import bokeh.plotting as bp
from bokeh.models import HoverTool, BoxSelectTool
from bokeh.plotting import figure, show, output_notebook
Liberias de metodos de CLUSTERING.
from sklearn.cluster import KMeans, MeanShift, DBSCAN
from sklearn.preprocessing import normalize, StandardScaler
from scipy.spatial import distance
from sklearn.neighbors import NearestNeighbors
from sklearn.decomposition import PCA
from sklearn.mixture import GaussianMixture
Librerias para definicion de Metricas
from sklearn import metrics
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score
from sklearn.metrics import confusion_matrix
# Reading data
ROOT_PATH = os.path.dirname(os.getcwd())
DATA_PATH = os.path.join(ROOT_PATH, 'data', 'raw')
df = pd.read_csv(os.path.join(DATA_PATH, 'players_22.csv'))
print('Cantidad de variables para analizar', len(df.columns))
Cantidad de variables para analizar 110
df.columns.tolist()
['sofifa_id', 'player_url', 'short_name', 'long_name', 'player_positions', 'overall', 'potential', 'value_eur', 'wage_eur', 'age', 'dob', 'height_cm', 'weight_kg', 'club_team_id', 'club_name', 'league_name', 'league_level', 'club_position', 'club_jersey_number', 'club_loaned_from', 'club_joined', 'club_contract_valid_until', 'nationality_id', 'nationality_name', 'nation_team_id', 'nation_position', 'nation_jersey_number', 'preferred_foot', 'weak_foot', 'skill_moves', 'international_reputation', 'work_rate', 'body_type', 'real_face', 'release_clause_eur', 'player_tags', 'player_traits', 'pace', 'shooting', 'passing', 'dribbling', 'defending', 'physic', 'attacking_crossing', 'attacking_finishing', 'attacking_heading_accuracy', 'attacking_short_passing', 'attacking_volleys', 'skill_dribbling', 'skill_curve', 'skill_fk_accuracy', 'skill_long_passing', 'skill_ball_control', 'movement_acceleration', 'movement_sprint_speed', 'movement_agility', 'movement_reactions', 'movement_balance', 'power_shot_power', 'power_jumping', 'power_stamina', 'power_strength', 'power_long_shots', 'mentality_aggression', 'mentality_interceptions', 'mentality_positioning', 'mentality_vision', 'mentality_penalties', 'mentality_composure', 'defending_marking_awareness', 'defending_standing_tackle', 'defending_sliding_tackle', 'goalkeeping_diving', 'goalkeeping_handling', 'goalkeeping_kicking', 'goalkeeping_positioning', 'goalkeeping_reflexes', 'goalkeeping_speed', 'ls', 'st', 'rs', 'lw', 'lf', 'cf', 'rf', 'rw', 'lam', 'cam', 'ram', 'lm', 'lcm', 'cm', 'rcm', 'rm', 'lwb', 'ldm', 'cdm', 'rdm', 'rwb', 'lb', 'lcb', 'cb', 'rcb', 'rb', 'gk', 'player_face_url', 'club_logo_url', 'club_flag_url', 'nation_logo_url', 'nation_flag_url']
df.isna().sum()
sofifa_id 0 player_url 0 short_name 0 long_name 0 player_positions 0 overall 0 potential 0 value_eur 74 wage_eur 61 age 0 dob 0 height_cm 0 weight_kg 0 club_team_id 61 club_name 61 league_name 61 league_level 61 club_position 61 club_jersey_number 61 club_loaned_from 18137 club_joined 1163 club_contract_valid_until 61 nationality_id 0 nationality_name 0 nation_team_id 18480 nation_position 18480 nation_jersey_number 18480 preferred_foot 0 weak_foot 0 skill_moves 0 international_reputation 0 work_rate 0 body_type 0 real_face 0 release_clause_eur 1176 player_tags 17798 player_traits 9841 pace 2132 shooting 2132 passing 2132 dribbling 2132 defending 2132 physic 2132 attacking_crossing 0 attacking_finishing 0 attacking_heading_accuracy 0 attacking_short_passing 0 attacking_volleys 0 skill_dribbling 0 skill_curve 0 skill_fk_accuracy 0 skill_long_passing 0 skill_ball_control 0 movement_acceleration 0 movement_sprint_speed 0 movement_agility 0 movement_reactions 0 movement_balance 0 power_shot_power 0 power_jumping 0 power_stamina 0 power_strength 0 power_long_shots 0 mentality_aggression 0 mentality_interceptions 0 mentality_positioning 0 mentality_vision 0 mentality_penalties 0 mentality_composure 0 defending_marking_awareness 0 defending_standing_tackle 0 defending_sliding_tackle 0 goalkeeping_diving 0 goalkeeping_handling 0 goalkeeping_kicking 0 goalkeeping_positioning 0 goalkeeping_reflexes 0 goalkeeping_speed 17107 ls 0 st 0 rs 0 lw 0 lf 0 cf 0 rf 0 rw 0 lam 0 cam 0 ram 0 lm 0 lcm 0 cm 0 rcm 0 rm 0 lwb 0 ldm 0 cdm 0 rdm 0 rwb 0 lb 0 lcb 0 cb 0 rcb 0 rb 0 gk 0 player_face_url 0 club_logo_url 61 club_flag_url 61 nation_logo_url 18480 nation_flag_url 0 dtype: int64
# Analisis de parametros estadisticos en todas las variables numericas
print(df.describe())
sofifa_id overall potential value_eur wage_eur \
count 19239.000000 19239.000000 19239.000000 1.916500e+04 19178.000000
mean 231468.086959 65.772182 71.079370 2.850452e+06 9017.989363
std 27039.717497 6.880232 6.086213 7.613700e+06 19470.176724
min 41.000000 47.000000 49.000000 9.000000e+03 500.000000
25% 214413.500000 61.000000 67.000000 4.750000e+05 1000.000000
50% 236543.000000 66.000000 71.000000 9.750000e+05 3000.000000
75% 253532.500000 70.000000 75.000000 2.000000e+06 8000.000000
max 264640.000000 93.000000 95.000000 1.940000e+08 350000.000000
age height_cm weight_kg club_team_id league_level \
count 19239.000000 19239.000000 19239.000000 19178.000000 19178.000000
mean 25.210822 181.299704 74.943032 50580.498123 1.354364
std 4.748235 6.863179 7.069434 54401.868535 0.747865
min 16.000000 155.000000 49.000000 1.000000 1.000000
25% 21.000000 176.000000 70.000000 479.000000 1.000000
50% 25.000000 181.000000 75.000000 1938.000000 1.000000
75% 29.000000 186.000000 80.000000 111139.000000 1.000000
max 54.000000 206.000000 110.000000 115820.000000 5.000000
club_jersey_number club_contract_valid_until nationality_id \
count 19178.000000 19178.000000 19239.000000
mean 20.945250 2022.764000 58.602682
std 17.909369 1.213203 50.298614
min 1.000000 2021.000000 1.000000
25% 9.000000 2022.000000 21.000000
50% 18.000000 2022.000000 45.000000
75% 27.000000 2024.000000 60.000000
max 99.000000 2031.000000 219.000000
nation_team_id nation_jersey_number weak_foot skill_moves \
count 759.000000 759.000000 19239.000000 19239.000000
mean 14480.848485 12.567852 2.946151 2.352461
std 35328.730217 7.039116 0.671560 0.767659
min 1318.000000 1.000000 1.000000 1.000000
25% 1338.000000 7.000000 3.000000 2.000000
50% 1357.000000 12.000000 3.000000 2.000000
75% 1386.000000 19.000000 3.000000 3.000000
max 111473.000000 28.000000 5.000000 5.000000
international_reputation release_clause_eur pace \
count 19239.000000 1.806300e+04 17107.000000
mean 1.094184 5.374044e+06 68.213071
std 0.371098 1.494837e+07 10.933155
min 1.000000 1.600000e+04 28.000000
25% 1.000000 8.060000e+05 62.000000
50% 1.000000 1.600000e+06 69.000000
75% 1.000000 3.700000e+06 76.000000
max 5.000000 3.735000e+08 97.000000
shooting passing dribbling defending physic \
count 17107.000000 17107.000000 17107.000000 17107.000000 17107.000000
mean 52.345297 57.312562 62.561174 51.703630 64.823289
std 14.051623 10.068965 9.651312 16.189746 9.791886
min 18.000000 25.000000 27.000000 14.000000 29.000000
25% 42.000000 51.000000 57.000000 37.000000 59.000000
50% 54.000000 58.000000 64.000000 56.000000 66.000000
75% 63.000000 64.000000 69.000000 64.000000 72.000000
max 94.000000 93.000000 95.000000 91.000000 90.000000
attacking_crossing attacking_finishing attacking_heading_accuracy \
count 19239.000000 19239.000000 19239.000000
mean 49.577421 45.894433 51.783877
std 18.034661 19.721023 17.294183
min 6.000000 2.000000 5.000000
25% 38.000000 30.000000 44.000000
50% 54.000000 50.000000 55.000000
75% 63.000000 62.000000 64.000000
max 94.000000 95.000000 93.000000
attacking_short_passing attacking_volleys skill_dribbling \
count 19239.000000 19239.000000 19239.000000
mean 58.867977 42.463849 55.660429
std 14.490858 17.653329 18.784590
min 7.000000 3.000000 4.000000
25% 54.000000 30.000000 50.000000
50% 62.000000 43.000000 61.000000
75% 68.000000 56.000000 68.000000
max 94.000000 90.000000 96.000000
skill_curve skill_fk_accuracy skill_long_passing \
count 19239.000000 19239.000000 19239.000000
mean 47.268933 42.249025 53.072249
std 18.181085 17.178590 15.026569
min 6.000000 4.000000 9.000000
25% 35.000000 31.000000 44.000000
50% 49.000000 41.000000 56.000000
75% 61.000000 55.000000 64.000000
max 94.000000 94.000000 93.000000
skill_ball_control movement_acceleration movement_sprint_speed \
count 19239.000000 19239.000000 19239.000000
mean 58.472010 64.652893 64.714902
std 16.663722 15.167399 14.965426
min 8.000000 14.000000 15.000000
25% 55.000000 57.000000 58.000000
50% 63.000000 67.000000 68.000000
75% 69.000000 75.000000 75.000000
max 96.000000 97.000000 97.000000
movement_agility movement_reactions movement_balance \
count 19239.000000 19239.000000 19239.000000
mean 63.500078 61.450023 64.068611
std 14.862285 9.042281 14.324789
min 18.000000 25.000000 15.000000
25% 55.000000 56.000000 56.000000
50% 66.000000 62.000000 66.000000
75% 74.000000 67.000000 74.000000
max 96.000000 94.000000 96.000000
power_shot_power power_jumping power_stamina power_strength \
count 19239.000000 19239.000000 19239.000000 19239.000000
mean 57.776860 64.813504 63.084880 65.007745
std 13.192224 12.122977 16.145279 12.663518
min 20.000000 22.000000 12.000000 19.000000
25% 48.000000 57.000000 56.000000 57.000000
50% 59.000000 65.000000 66.000000 66.000000
75% 68.000000 73.000000 74.000000 74.000000
max 95.000000 95.000000 97.000000 97.000000
power_long_shots mentality_aggression mentality_interceptions \
count 19239.000000 19239.000000 19239.000000
mean 46.642705 55.538957 46.613545
std 19.411583 16.972181 20.677077
min 4.000000 10.000000 3.000000
25% 32.000000 44.000000 26.000000
50% 51.000000 58.000000 53.000000
75% 62.000000 68.000000 64.000000
max 94.000000 95.000000 91.000000
mentality_positioning mentality_vision mentality_penalties \
count 19239.000000 19239.000000 19239.000000
mean 50.330215 53.964603 47.858724
std 19.621601 13.650481 15.768583
min 2.000000 10.000000 7.000000
25% 40.000000 45.000000 38.000000
50% 56.000000 55.000000 49.000000
75% 64.000000 64.000000 60.000000
max 96.000000 95.000000 93.000000
mentality_composure defending_marking_awareness \
count 19239.000000 19239.000000
mean 57.929830 46.601746
std 12.159326 20.200807
min 12.000000 4.000000
25% 50.000000 29.000000
50% 59.000000 52.000000
75% 66.000000 63.000000
max 96.000000 93.000000
defending_standing_tackle defending_sliding_tackle \
count 19239.000000 19239.000000
mean 48.045584 45.906700
std 21.232718 20.755683
min 5.000000 5.000000
25% 28.000000 25.000000
50% 56.000000 53.000000
75% 65.000000 63.000000
max 93.000000 92.000000
goalkeeping_diving goalkeeping_handling goalkeeping_kicking \
count 19239.000000 19239.000000 19239.000000
mean 16.406102 16.192474 16.055356
std 17.574028 16.839528 16.564554
min 2.000000 2.000000 2.000000
25% 8.000000 8.000000 8.000000
50% 11.000000 11.000000 11.000000
75% 14.000000 14.000000 14.000000
max 91.000000 92.000000 93.000000
goalkeeping_positioning goalkeeping_reflexes goalkeeping_speed
count 19239.000000 19239.000000 2132.000000
mean 16.229274 16.491814 36.439962
std 17.059779 17.884833 10.751563
min 2.000000 2.000000 15.000000
25% 8.000000 8.000000 27.000000
50% 11.000000 11.000000 36.000000
75% 14.000000 14.000000 45.000000
max 92.000000 90.000000 65.000000
best_players_per_position = df.iloc[df.groupby(df['player_positions'])['overall'].idxmax()][['player_positions','short_name','overall']]
best_players_per_position.head()
| player_positions | short_name | overall | |
|---|---|---|---|
| 28 | CAM | Bruno Fernandes | 88 |
| 2052 | CAM, CDM | P. Kasami | 74 |
| 2711 | CAM, CDM, CM | A. Ricaurte | 73 |
| 1589 | CAM, CDM, LM | Álex Fernández | 75 |
| 8515 | CAM, CDM, RM | M. Miranda | 67 |
best_players_per_position.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 674 entries, 28 to 3959 Data columns (total 3 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 player_positions 674 non-null object 1 short_name 674 non-null object 2 overall 674 non-null int64 dtypes: int64(1), object(2) memory usage: 21.1+ KB
Como podemos ver para cada jugador en el mismo campo de posisiones podemos tener hasta 3 valores, lo cual al analizar los mejores jugadores por posicion me genera un data frame de 674 registros.
Por lo tanto optamos por tomar como principal la primera posicion que se encuentra en esta campo, el resto se descarto.
# Transformo en un string separado por , al campo de posiciones que tiene mas de una alternativa
df['player_positions'] = df['player_positions'].map(lambda x: str(x))
# Transformado el campo ahora tomo la primera posicion que figura en las lista de alternativas
df['player_positions_principal'] = df['player_positions'].apply(lambda x: x.split(",")[0] if x != 'nan' else None)
# Candidad de jugadores por player_positions_principal
pd.DataFrame(df.player_positions_principal.value_counts().sort_index())
| player_positions_principal | |
|---|---|
| CAM | 1151 |
| CB | 3339 |
| CDM | 1665 |
| CF | 142 |
| CM | 2173 |
| GK | 2132 |
| LB | 1360 |
| LM | 1016 |
| LW | 435 |
| LWB | 171 |
| RB | 1346 |
| RM | 1028 |
| RW | 495 |
| RWB | 178 |
| ST | 2608 |
# Repertimos el analisis anterior y en funcion de la prosicion que consideramos como principal
# sacamos los mejores jugadores segun el overall definido
best_players_per_position = df.iloc[df.groupby(df['player_positions_principal'])['overall'].idxmax()][['player_positions', 'player_positions_principal','short_name','overall']]
best_players_per_position
| player_positions | player_positions_principal | short_name | overall | |
|---|---|---|---|---|
| 28 | CAM | CAM | Bruno Fernandes | 88 |
| 15 | CB | CB | V. van Dijk | 89 |
| 10 | CDM, CM | CDM | N. Kanté | 90 |
| 11 | CF, ST | CF | K. Benzema | 89 |
| 4 | CM, CAM | CM | K. De Bruyne | 91 |
| 5 | GK | GK | J. Oblak | 91 |
| 42 | LB | LB | A. Robertson | 87 |
| 13 | LM, CF, LW | LM | H. Son | 89 |
| 3 | LW, CAM | LW | Neymar Jr | 91 |
| 165 | LWB, LB, LM | LWB | L. Spinazzola | 83 |
| 44 | RB | RB | T. Alexander-Arnold | 87 |
| 45 | RM, CF, LM | RM | J. Sancho | 87 |
| 0 | RW, ST, CF | RW | L. Messi | 93 |
| 351 | RWB, RB, RM | RWB | H. Hateboer | 81 |
| 1 | ST | ST | R. Lewandowski | 92 |
# Clubes que tienen mejor promedio de Overall entre sus jugadores
club_avg_overall = df.groupby("club_name")["overall"].mean().reset_index().sort_values("overall",ascending=False)
club_avg_overall.head(10)
| club_name | overall | |
|---|---|---|
| 368 | Juventus | 79.961538 |
| 470 | Paris Saint-Germain | 79.454545 |
| 358 | Inter | 78.857143 |
| 241 | FC Bayern München | 78.071429 |
| 412 | Manchester United | 78.000000 |
| 509 | Real Madrid CF | 77.757576 |
| 434 | Napoli | 77.500000 |
| 63 | Atalanta | 77.480000 |
| 512 | Real Sociedad | 77.280000 |
| 546 | SL Benfica | 76.931034 |
La variable 'Overall' toma valores enteros entre 0 y 100, se calcula usando otras variables de desempeño del jugador (skills_ratings), utilizando redondeo. Por lo tanto la variable 'Overall' figura como numérica discreta, pero como se redondea es de "naturaleza" continua.
# Realizamos el histograma de la variable "Overall" (Desempeño global)
nbins = 1*(df.overall.max()-df.overall.min())+1
df['overall'].hist(bins = nbins)
print(nbins)
47
Las demas variables que definen el desempeño (overall) del jugador segun la habilidad que cada una represnta ('attacking_crossing', 'attacking_finishing', 'skill_dribbling', 'skill_long_passing', 'skill_ball_control', 'movement_acceleration', 'movement_agility', 'power_shot_power', 'power_jumping', etc) toman valores enteros entre 0 y 100 tambien redondeados y que juntos permiten obtener el desempeño total del jugador (overall)
# Realizamos el histograma de la variable "attacking_crossing" (Desempeño global)
nbins = 1*(df.attacking_crossing.max()-df.attacking_crossing.min())+1
df['attacking_crossing'].hist(bins = nbins)
print(nbins)
89
# Realizamos el histograma de la variable "skill_ball_control" (Desempeño global)
nbins = 1*(df.skill_ball_control.max()-df.skill_ball_control.min())+1
df['skill_ball_control'].hist(bins = nbins)
print(nbins)
89
# Realizamos el histograma de la variable "power_shot_power" (Desempeño global)
nbins = 1*(df.power_shot_power.max()-df.power_shot_power.min())+1
df['power_shot_power'].hist(bins = nbins)
print(nbins)
76
# Realizo una copia del data frame para empezar el analisis
df_fifa22 = df.copy()
# Definimos una cantidad de futbolistas para analizar
n=10000
# Reducimos el data set a la cantidad de analisis
df_fifa22 = df.loc[:n]
# De la cantidad que definimos solo se tomaron los jugadores que superan el 70% de overall
df_fifa22 = df_fifa22[(df_fifa22['overall']>70)]
df_fifa22
| sofifa_id | player_url | short_name | long_name | player_positions | overall | potential | value_eur | wage_eur | age | dob | height_cm | weight_kg | club_team_id | club_name | league_name | league_level | club_position | club_jersey_number | club_loaned_from | club_joined | club_contract_valid_until | nationality_id | nationality_name | nation_team_id | nation_position | nation_jersey_number | preferred_foot | weak_foot | skill_moves | international_reputation | work_rate | body_type | real_face | release_clause_eur | player_tags | player_traits | pace | shooting | passing | dribbling | defending | physic | attacking_crossing | attacking_finishing | attacking_heading_accuracy | attacking_short_passing | attacking_volleys | skill_dribbling | skill_curve | ... | power_strength | power_long_shots | mentality_aggression | mentality_interceptions | mentality_positioning | mentality_vision | mentality_penalties | mentality_composure | defending_marking_awareness | defending_standing_tackle | defending_sliding_tackle | goalkeeping_diving | goalkeeping_handling | goalkeeping_kicking | goalkeeping_positioning | goalkeeping_reflexes | goalkeeping_speed | ls | st | rs | lw | lf | cf | rf | rw | lam | cam | ram | lm | lcm | cm | rcm | rm | lwb | ldm | cdm | rdm | rwb | lb | lcb | cb | rcb | rb | gk | player_face_url | club_logo_url | club_flag_url | nation_logo_url | nation_flag_url | player_positions_principal | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 158023 | https://sofifa.com/player/158023/lionel-messi/... | L. Messi | Lionel Andrés Messi Cuccittini | RW, ST, CF | 93 | 93 | 78000000.0 | 320000.0 | 34 | 1987-06-24 | 170 | 72 | 73.0 | Paris Saint-Germain | French Ligue 1 | 1.0 | RW | 30.0 | NaN | 2021-08-10 | 2023.0 | 52 | Argentina | 1369.0 | RW | 10.0 | Left | 4 | 4 | 5 | Medium/Low | Unique | Yes | 144300000.0 | #Dribbler, #Distance Shooter, #FK Specialist, ... | Finesse Shot, Long Shot Taker (AI), Playmaker ... | 85.0 | 92.0 | 91.0 | 95.0 | 34.0 | 65.0 | 85 | 95 | 70 | 91 | 88 | 96 | 93 | ... | 69 | 94 | 44 | 40 | 93 | 95 | 75 | 96 | 20 | 35 | 24 | 6 | 11 | 15 | 14 | 8 | NaN | 89+3 | 89+3 | 89+3 | 92 | 93 | 93 | 93 | 92 | 93 | 93 | 93 | 91+2 | 87+3 | 87+3 | 87+3 | 91+2 | 66+3 | 64+3 | 64+3 | 64+3 | 66+3 | 61+3 | 50+3 | 50+3 | 50+3 | 61+3 | 19+3 | https://cdn.sofifa.net/players/158/023/22_120.png | https://cdn.sofifa.net/teams/73/60.png | https://cdn.sofifa.net/flags/fr.png | https://cdn.sofifa.net/teams/1369/60.png | https://cdn.sofifa.net/flags/ar.png | RW |
| 1 | 188545 | https://sofifa.com/player/188545/robert-lewand... | R. Lewandowski | Robert Lewandowski | ST | 92 | 92 | 119500000.0 | 270000.0 | 32 | 1988-08-21 | 185 | 81 | 21.0 | FC Bayern München | German 1. Bundesliga | 1.0 | ST | 9.0 | NaN | 2014-07-01 | 2023.0 | 37 | Poland | 1353.0 | RS | 9.0 | Right | 4 | 4 | 5 | High/Medium | Unique | Yes | 197200000.0 | #Aerial Threat, #Distance Shooter, #Clinical F... | Solid Player, Finesse Shot, Outside Foot Shot,... | 78.0 | 92.0 | 79.0 | 86.0 | 44.0 | 82.0 | 71 | 95 | 90 | 85 | 89 | 85 | 79 | ... | 86 | 87 | 81 | 49 | 95 | 81 | 90 | 88 | 35 | 42 | 19 | 15 | 6 | 12 | 8 | 10 | NaN | 90+2 | 90+2 | 90+2 | 85 | 88 | 88 | 88 | 85 | 86+3 | 86+3 | 86+3 | 84+3 | 80+3 | 80+3 | 80+3 | 84+3 | 64+3 | 66+3 | 66+3 | 66+3 | 64+3 | 61+3 | 60+3 | 60+3 | 60+3 | 61+3 | 19+3 | https://cdn.sofifa.net/players/188/545/22_120.png | https://cdn.sofifa.net/teams/21/60.png | https://cdn.sofifa.net/flags/de.png | https://cdn.sofifa.net/teams/1353/60.png | https://cdn.sofifa.net/flags/pl.png | ST |
| 2 | 20801 | https://sofifa.com/player/20801/c-ronaldo-dos-... | Cristiano Ronaldo | Cristiano Ronaldo dos Santos Aveiro | ST, LW | 91 | 91 | 45000000.0 | 270000.0 | 36 | 1985-02-05 | 187 | 83 | 11.0 | Manchester United | English Premier League | 1.0 | ST | 7.0 | NaN | 2021-08-27 | 2023.0 | 38 | Portugal | 1354.0 | ST | 7.0 | Right | 4 | 5 | 5 | High/Low | Unique | Yes | 83300000.0 | #Aerial Threat, #Dribbler, #Distance Shooter, ... | Power Free-Kick, Flair, Long Shot Taker (AI), ... | 87.0 | 94.0 | 80.0 | 88.0 | 34.0 | 75.0 | 87 | 95 | 90 | 80 | 86 | 88 | 81 | ... | 77 | 93 | 63 | 29 | 95 | 76 | 88 | 95 | 24 | 32 | 24 | 7 | 11 | 15 | 14 | 11 | NaN | 90+1 | 90+1 | 90+1 | 88 | 89 | 89 | 89 | 88 | 86+3 | 86+3 | 86+3 | 86+3 | 78+3 | 78+3 | 78+3 | 86+3 | 63+3 | 59+3 | 59+3 | 59+3 | 63+3 | 60+3 | 53+3 | 53+3 | 53+3 | 60+3 | 20+3 | https://cdn.sofifa.net/players/020/801/22_120.png | https://cdn.sofifa.net/teams/11/60.png | https://cdn.sofifa.net/flags/gb-eng.png | https://cdn.sofifa.net/teams/1354/60.png | https://cdn.sofifa.net/flags/pt.png | ST |
| 3 | 190871 | https://sofifa.com/player/190871/neymar-da-sil... | Neymar Jr | Neymar da Silva Santos Júnior | LW, CAM | 91 | 91 | 129000000.0 | 270000.0 | 29 | 1992-02-05 | 175 | 68 | 73.0 | Paris Saint-Germain | French Ligue 1 | 1.0 | LW | 10.0 | NaN | 2017-08-03 | 2025.0 | 54 | Brazil | NaN | NaN | NaN | Right | 5 | 5 | 5 | High/Medium | Unique | Yes | 238700000.0 | #Speedster, #Dribbler, #Playmaker, #FK Special... | Injury Prone, Flair, Speed Dribbler (AI), Play... | 91.0 | 83.0 | 86.0 | 94.0 | 37.0 | 63.0 | 85 | 83 | 63 | 86 | 86 | 95 | 88 | ... | 53 | 81 | 63 | 37 | 86 | 90 | 93 | 93 | 35 | 32 | 29 | 9 | 9 | 15 | 15 | 11 | NaN | 83+3 | 83+3 | 83+3 | 90 | 88 | 88 | 88 | 90 | 89+2 | 89+2 | 89+2 | 89+2 | 82+3 | 82+3 | 82+3 | 89+2 | 67+3 | 63+3 | 63+3 | 63+3 | 67+3 | 62+3 | 50+3 | 50+3 | 50+3 | 62+3 | 20+3 | https://cdn.sofifa.net/players/190/871/22_120.png | https://cdn.sofifa.net/teams/73/60.png | https://cdn.sofifa.net/flags/fr.png | NaN | https://cdn.sofifa.net/flags/br.png | LW |
| 4 | 192985 | https://sofifa.com/player/192985/kevin-de-bruy... | K. De Bruyne | Kevin De Bruyne | CM, CAM | 91 | 91 | 125500000.0 | 350000.0 | 30 | 1991-06-28 | 181 | 70 | 10.0 | Manchester City | English Premier League | 1.0 | RCM | 17.0 | NaN | 2015-08-30 | 2025.0 | 7 | Belgium | 1325.0 | RCM | 7.0 | Right | 5 | 4 | 4 | High/High | Unique | Yes | 232200000.0 | #Dribbler, #Playmaker, #Engine, #Distance Shoo... | Injury Prone, Leadership, Early Crosser, Long ... | 76.0 | 86.0 | 93.0 | 88.0 | 64.0 | 78.0 | 94 | 82 | 55 | 94 | 82 | 88 | 85 | ... | 74 | 91 | 76 | 66 | 88 | 94 | 83 | 89 | 68 | 65 | 53 | 15 | 13 | 5 | 10 | 13 | NaN | 83+3 | 83+3 | 83+3 | 88 | 87 | 87 | 87 | 88 | 89+2 | 89+2 | 89+2 | 89+2 | 89+2 | 89+2 | 89+2 | 89+2 | 79+3 | 80+3 | 80+3 | 80+3 | 79+3 | 75+3 | 69+3 | 69+3 | 69+3 | 75+3 | 21+3 | https://cdn.sofifa.net/players/192/985/22_120.png | https://cdn.sofifa.net/teams/10/60.png | https://cdn.sofifa.net/flags/gb-eng.png | https://cdn.sofifa.net/teams/1325/60.png | https://cdn.sofifa.net/flags/be.png | CM |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 4588 | 261987 | https://sofifa.com/player/261987/antonio-blanc... | Blanco | Antonio Blanco Conde | CM, CDM | 71 | 83 | 4500000.0 | 51000.0 | 20 | 2000-07-23 | 176 | 68 | 243.0 | Real Madrid CF | Spain Primera Division | 1.0 | RES | 27.0 | NaN | 2020-07-01 | 2022.0 | 45 | Spain | NaN | NaN | NaN | Right | 3 | 4 | 1 | High/Medium | Normal (170-185) | No | 10100000.0 | NaN | NaN | 61.0 | 56.0 | 67.0 | 66.0 | 62.0 | 58.0 | 46 | 55 | 49 | 75 | 37 | 65 | 56 | ... | 53 | 64 | 58 | 69 | 67 | 75 | 60 | 73 | 60 | 70 | 46 | 11 | 7 | 13 | 12 | 9 | NaN | 60+2 | 60+2 | 60+2 | 64 | 65 | 65 | 65 | 64 | 68+2 | 68+2 | 68+2 | 65+2 | 71+2 | 71+2 | 71+2 | 65+2 | 63+2 | 67+2 | 67+2 | 67+2 | 63+2 | 62+2 | 61+2 | 61+2 | 61+2 | 62+2 | 16+2 | https://cdn.sofifa.net/players/261/987/22_120.png | https://cdn.sofifa.net/teams/243/60.png | https://cdn.sofifa.net/flags/es.png | NaN | https://cdn.sofifa.net/flags/es.png | CM |
| 4589 | 262411 | https://sofifa.com/player/262411/miguel-crespo... | Miguel Crespo | Miguel Crespo da Silva | CM | 71 | 76 | 2800000.0 | 24000.0 | 24 | 1996-09-11 | 183 | 78 | 326.0 | Fenerbahçe SK | Turkish Süper Lig | 1.0 | SUB | 27.0 | NaN | 2021-09-06 | 2024.0 | 38 | Portugal | NaN | NaN | NaN | Right | 3 | 2 | 1 | High/High | Normal (170-185) | No | 5500000.0 | NaN | Playmaker (AI) | 67.0 | 64.0 | 70.0 | 72.0 | 68.0 | 79.0 | 66 | 60 | 62 | 73 | 58 | 73 | 73 | ... | 77 | 65 | 83 | 70 | 68 | 70 | 66 | 74 | 61 | 74 | 69 | 11 | 8 | 8 | 15 | 8 | NaN | 67+2 | 67+2 | 67+2 | 69 | 69 | 69 | 69 | 69 | 70+2 | 70+2 | 70+2 | 70+2 | 71+2 | 71+2 | 71+2 | 70+2 | 70+2 | 72+2 | 72+2 | 72+2 | 70+2 | 70+2 | 70+2 | 70+2 | 70+2 | 70+2 | 17+2 | https://cdn.sofifa.net/players/262/411/22_120.png | https://cdn.sofifa.net/teams/326/60.png | https://cdn.sofifa.net/flags/tr.png | NaN | https://cdn.sofifa.net/flags/pt.png | CM |
| 4590 | 262815 | https://sofifa.com/player/262815/giannis-kotsi... | G. Kotsiras | Giannis Kotsiras | RB, RM | 71 | 71 | 1700000.0 | 600.0 | 28 | 1992-12-16 | 183 | 77 | 1884.0 | Panathinaikos FC | Greek Super League | 1.0 | RB | 33.0 | NaN | 2021-06-22 | 2024.0 | 22 | Greece | NaN | NaN | NaN | Right | 4 | 2 | 1 | High/Medium | Normal (170-185) | No | 3700000.0 | NaN | NaN | 81.0 | 51.0 | 66.0 | 70.0 | 66.0 | 67.0 | 70 | 61 | 49 | 71 | 29 | 71 | 74 | ... | 70 | 40 | 50 | 64 | 65 | 59 | 40 | 70 | 68 | 70 | 67 | 12 | 12 | 11 | 15 | 9 | NaN | 62+2 | 62+2 | 62+2 | 68 | 65 | 65 | 65 | 68 | 66+2 | 66+2 | 66+2 | 69+2 | 66+2 | 66+2 | 66+2 | 69+2 | 70+1 | 67+2 | 67+2 | 67+2 | 70+1 | 69+2 | 65+2 | 65+2 | 65+2 | 69+2 | 17+2 | https://cdn.sofifa.net/players/262/815/22_120.png | https://cdn.sofifa.net/teams/1884/60.png | https://cdn.sofifa.net/flags/gr.png | NaN | https://cdn.sofifa.net/flags/gr.png | RB |
| 4591 | 263230 | https://sofifa.com/player/263230/milutin-osmaj... | M. Osmajić | Milutin Osmajić | ST, LM, RM | 71 | 81 | 4200000.0 | 15000.0 | 21 | 1999-07-25 | 185 | 76 | 1968.0 | Cádiz CF | Spain Primera Division | 1.0 | SUB | 29.0 | NaN | 2021-07-10 | 2024.0 | 15 | Montenegro | NaN | NaN | NaN | Right | 5 | 3 | 1 | Medium/Medium | Stocky (185+) | No | 10100000.0 | NaN | NaN | 76.0 | 70.0 | 60.0 | 65.0 | 39.0 | 71.0 | 64 | 74 | 72 | 63 | 58 | 66 | 56 | ... | 74 | 63 | 70 | 30 | 73 | 59 | 63 | 59 | 35 | 38 | 34 | 14 | 15 | 14 | 12 | 8 | NaN | 71+2 | 71+2 | 71+2 | 68 | 69 | 69 | 69 | 68 | 66+2 | 66+2 | 66+2 | 67+2 | 61+2 | 61+2 | 61+2 | 67+2 | 54+2 | 52+2 | 52+2 | 52+2 | 54+2 | 53+2 | 51+2 | 51+2 | 51+2 | 53+2 | 19+2 | https://cdn.sofifa.net/players/263/230/22_120.png | https://cdn.sofifa.net/teams/1968/60.png | https://cdn.sofifa.net/flags/es.png | NaN | https://cdn.sofifa.net/flags/me.png | ST |
| 4592 | 263383 | https://sofifa.com/player/263383/stjepan-lonca... | S. Lončar | Stjepan Lončar | CDM, CAM | 71 | 79 | 3900000.0 | 550.0 | 24 | 1996-11-10 | 187 | 74 | 1874.0 | Ferencvárosi TC | Hungarian Nemzeti Bajnokság I | 1.0 | SUB | 44.0 | NaN | 2021-07-16 | 2026.0 | 8 | Bosnia and Herzegovina | NaN | NaN | NaN | Left | 3 | 2 | 1 | High/High | Normal (185+) | No | 9400000.0 | NaN | Solid Player, Playmaker (AI) | 61.0 | 62.0 | 67.0 | 71.0 | 65.0 | 73.0 | 57 | 61 | 54 | 70 | 61 | 71 | 60 | ... | 72 | 66 | 72 | 66 | 68 | 71 | 48 | 74 | 64 | 70 | 61 | 13 | 10 | 10 | 6 | 9 | NaN | 66+2 | 66+2 | 66+2 | 67 | 68 | 68 | 68 | 67 | 70+2 | 70+2 | 70+2 | 69+2 | 72+2 | 72+2 | 72+2 | 69+2 | 67+2 | 70+2 | 70+2 | 70+2 | 67+2 | 66+2 | 66+2 | 66+2 | 66+2 | 66+2 | 16+2 | https://cdn.sofifa.net/players/263/383/22_120.png | https://cdn.sofifa.net/teams/1874/60.png | https://cdn.sofifa.net/flags/hu.png | NaN | https://cdn.sofifa.net/flags/ba.png | CDM |
4593 rows × 111 columns
Se definen variables generales para todos, en el caso de los jugadores solamente, las columnas de datos que referencian el desempeños de arqueros, 'goalkeeping_diving', 'goalkeeping_handling', 'goalkeeping_kicking', 'goalkeeping_positioning', 'goalkeeping_reflexes' tendran valor 0
skills_ratings = ['attacking_crossing', 'attacking_finishing', 'attacking_heading_accuracy', 'attacking_short_passing',
'attacking_volleys', 'skill_dribbling','skill_curve', 'skill_fk_accuracy', 'skill_long_passing', 'skill_ball_control', 'movement_acceleration',
'movement_sprint_speed', 'movement_agility', 'movement_reactions', 'movement_balance', 'power_shot_power', 'power_jumping', 'power_stamina', 'power_strength',
'power_long_shots', 'mentality_aggression', 'mentality_interceptions', 'mentality_positioning', 'mentality_vision', 'mentality_penalties',
'mentality_composure', 'defending_marking_awareness', 'defending_standing_tackle', 'defending_sliding_tackle', 'goalkeeping_diving', 'goalkeeping_handling',
'goalkeeping_kicking', 'goalkeeping_positioning', 'goalkeeping_reflexes']
print(len(skills_ratings), 'variables numéricas de desempeño según habilidad')
34 variables numéricas de desempeño según habilidad
# Ponemos al overall el valor 99
MachineGunDict = {'short_name':'MachineGun','overall':99}
# Para todas las columnas de skils en este jugador ponemos el valor 99 y lo agregamos a la tabla general
for skills in skills_ratings:
MachineGunDict[skills]=99
df_fifa22 = df_fifa22.append(MachineGunDict,ignore_index=True)
# Genero un data frame, sólo con desempeños según habilidad (numéricas)
df_skills = df_fifa22[skills_ratings]
df_skills
| attacking_crossing | attacking_finishing | attacking_heading_accuracy | attacking_short_passing | attacking_volleys | skill_dribbling | skill_curve | skill_fk_accuracy | skill_long_passing | skill_ball_control | movement_acceleration | movement_sprint_speed | movement_agility | movement_reactions | movement_balance | power_shot_power | power_jumping | power_stamina | power_strength | power_long_shots | mentality_aggression | mentality_interceptions | mentality_positioning | mentality_vision | mentality_penalties | mentality_composure | defending_marking_awareness | defending_standing_tackle | defending_sliding_tackle | goalkeeping_diving | goalkeeping_handling | goalkeeping_kicking | goalkeeping_positioning | goalkeeping_reflexes | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 85 | 95 | 70 | 91 | 88 | 96 | 93 | 94 | 91 | 96 | 91 | 80 | 91 | 94 | 95 | 86 | 68 | 72 | 69 | 94 | 44 | 40 | 93 | 95 | 75 | 96 | 20 | 35 | 24 | 6 | 11 | 15 | 14 | 8 |
| 1 | 71 | 95 | 90 | 85 | 89 | 85 | 79 | 85 | 70 | 88 | 77 | 79 | 77 | 93 | 82 | 90 | 85 | 76 | 86 | 87 | 81 | 49 | 95 | 81 | 90 | 88 | 35 | 42 | 19 | 15 | 6 | 12 | 8 | 10 |
| 2 | 87 | 95 | 90 | 80 | 86 | 88 | 81 | 84 | 77 | 88 | 85 | 88 | 86 | 94 | 74 | 94 | 95 | 77 | 77 | 93 | 63 | 29 | 95 | 76 | 88 | 95 | 24 | 32 | 24 | 7 | 11 | 15 | 14 | 11 |
| 3 | 85 | 83 | 63 | 86 | 86 | 95 | 88 | 87 | 81 | 95 | 93 | 89 | 96 | 89 | 84 | 80 | 64 | 81 | 53 | 81 | 63 | 37 | 86 | 90 | 93 | 93 | 35 | 32 | 29 | 9 | 9 | 15 | 15 | 11 |
| 4 | 94 | 82 | 55 | 94 | 82 | 88 | 85 | 83 | 93 | 91 | 76 | 76 | 79 | 91 | 78 | 91 | 63 | 89 | 74 | 91 | 76 | 66 | 88 | 94 | 83 | 89 | 68 | 65 | 53 | 15 | 13 | 5 | 10 | 13 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 4589 | 66 | 60 | 62 | 73 | 58 | 73 | 73 | 63 | 70 | 70 | 69 | 66 | 74 | 69 | 65 | 73 | 67 | 81 | 77 | 65 | 83 | 70 | 68 | 70 | 66 | 74 | 61 | 74 | 69 | 11 | 8 | 8 | 15 | 8 |
| 4590 | 70 | 61 | 49 | 71 | 29 | 71 | 74 | 35 | 69 | 72 | 80 | 82 | 67 | 58 | 65 | 45 | 69 | 73 | 70 | 40 | 50 | 64 | 65 | 59 | 40 | 70 | 68 | 70 | 67 | 12 | 12 | 11 | 15 | 9 |
| 4591 | 64 | 74 | 72 | 63 | 58 | 66 | 56 | 49 | 57 | 65 | 79 | 74 | 55 | 70 | 75 | 74 | 68 | 65 | 74 | 63 | 70 | 30 | 73 | 59 | 63 | 59 | 35 | 38 | 34 | 14 | 15 | 14 | 12 | 8 |
| 4592 | 57 | 61 | 54 | 70 | 61 | 71 | 60 | 49 | 74 | 78 | 59 | 62 | 53 | 71 | 55 | 62 | 61 | 78 | 72 | 66 | 72 | 66 | 68 | 71 | 48 | 74 | 64 | 70 | 61 | 13 | 10 | 10 | 6 | 9 |
| 4593 | 99 | 99 | 99 | 99 | 99 | 99 | 99 | 99 | 99 | 99 | 99 | 99 | 99 | 99 | 99 | 99 | 99 | 99 | 99 | 99 | 99 | 99 | 99 | 99 | 99 | 99 | 99 | 99 | 99 | 99 | 99 | 99 | 99 | 99 |
4594 rows × 34 columns
# Armamos una separacion de los jugadores para analizar diferentes posiciones
forwards=['RF', 'ST', 'LW', 'LF', 'RS', 'LS', 'RM', 'LM','RW']
midfielders=['RCM','LCM','LDM','CAM','CDM','LAM','RDM','CM','RAM','CF']
defenders=['RCB','CB','LCB','LB','RB','RWB','LWB']
goalkeepers=['GK']
def pos2(position):
if position in forwards:
return 'Forward'
elif position in midfielders:
return 'Midfielder'
elif position in defenders:
return 'Defender'
elif position in goalkeepers:
return 'GK'
else:
return 'nan'
# Cargo en la columna Position2 la clasificacion general de posiciones que armamos con forwards,
# midfielders, defenders y goalkeepers, utilizando la funcion definida antes pos2
df_fifa22['Position2'] = df_fifa22["player_positions_principal"].str.split(',').str[0].apply(lambda x: pos2(x))
df_fifa22['Position2'].value_counts()
Defender 1485 Forward 1332 Midfielder 1332 GK 444 nan 1 Name: Position2, dtype: int64
Visualizamos los datos (jugadores) según sus habilidades es decir, en un espacio 34 dimensional. Cómo hacemos? empecemos de a dos variables numéricas por vez
Consigna de trabajo: visualizar a los jugadores (datos) usando las variables numéricas en skill_ratings de a pares. Emergen grupos/clusters?
skills_ratings
['attacking_crossing', 'attacking_finishing', 'attacking_heading_accuracy', 'attacking_short_passing', 'attacking_volleys', 'skill_dribbling', 'skill_curve', 'skill_fk_accuracy', 'skill_long_passing', 'skill_ball_control', 'movement_acceleration', 'movement_sprint_speed', 'movement_agility', 'movement_reactions', 'movement_balance', 'power_shot_power', 'power_jumping', 'power_stamina', 'power_strength', 'power_long_shots', 'mentality_aggression', 'mentality_interceptions', 'mentality_positioning', 'mentality_vision', 'mentality_penalties', 'mentality_composure', 'defending_marking_awareness', 'defending_standing_tackle', 'defending_sliding_tackle', 'goalkeeping_diving', 'goalkeeping_handling', 'goalkeeping_kicking', 'goalkeeping_positioning', 'goalkeeping_reflexes']
# Visualizamos la cantidad de columnas que definimos como parametro de la funcion
sns.pairplot(df_skills[skills_ratings[0:5]])
<seaborn.axisgrid.PairGrid at 0x7fa95b688760>
# Diferenciamos entre True y False a los mejores jugadores con desempeño por arriba de 85
bool_crack = df_fifa22["overall"] > 85
# Elegimos dos variables, para esto elegimos dos números entre 0 y de n_skills-1
skill_1 = skills_ratings[6]
skill_2 = skills_ratings[10]
Gráfica con matplotlib.pyplot, liviana pero más sencilla
plt.figure(figsize=(8,8), dpi=80)
plt.scatter(df_skills[skill_1], y=df_skills[skill_2],s=3,c=bool_crack)
plt.xlabel(skill_1)
plt.ylabel(skill_2)
plt.show()
Gráfica con Plotly, más completa e interactiva pero un poco pesada
graf1 = go.Scatter(x=df_skills[skill_1], y=df_skills[skill_2],
mode='markers',
text=df_fifa22.loc[:,'club_name'], #'Height', 'Weight', 'Club', 'Age', 'Name','Position'
marker=dict(
size=5)
)
crack =go.Scatter(x=df_skills.loc[bool_crack,skill_1], y=df_skills.loc[bool_crack,skill_2],name='Top players',
text=df_fifa22.loc[bool_crack,'short_name'],
textfont=dict(family='sans serif',size=10,color='black'),
opacity=0.9,mode='text')
data=[graf1,crack]
layout = go.Layout(title="Visualización de la base de a dos variables numéricas",titlefont=dict(size=20),
xaxis=dict(title=skill_1),
yaxis=dict(title=skill_2),
autosize=False, width=1000,height=650)
fig = go.Figure(data=data, layout=layout)
fig.show()
#Elegimos dos variables, para esto elegimos dos números entre 0 y de n_skills-1
skill_1 = skills_ratings[2]
skill_2 = skills_ratings[3]
Gráfica con matplotlib.pyplot, liviana pero más sencilla
plt.figure(figsize=(8,8), dpi=80)
plt.scatter(df_skills[skill_1], y=df_skills[skill_2],s=3,c=bool_crack)
plt.xlabel(skill_1)
plt.ylabel(skill_2)
plt.show()
Gráfica con Plotly, más completa e interactiva pero un poco pesada
graf1 = go.Scatter(x=df_skills[skill_1], y=df_skills[skill_2],
mode='markers',
text=df_fifa22.loc[:,'club_name'], #'Height', 'Weight', 'Club', 'Age', 'Name','Position'
marker=dict(
size=5)
)
crack =go.Scatter(x=df_skills.loc[bool_crack,skill_1], y=df_skills.loc[bool_crack,skill_2],name='Top players',
text=df_fifa22.loc[bool_crack,'short_name'],
textfont=dict(family='sans serif',size=10,color='black'),
opacity=0.9,mode='text')
data=[graf1,crack]
layout = go.Layout(title="Visualización de la base de a dos variables numéricas",titlefont=dict(size=20),
xaxis=dict(title=skill_1),
yaxis=dict(title=skill_2),
autosize=False, width=1000,height=650)
fig = go.Figure(data=data, layout=layout)
fig.show()
En próxima gráfica, similar a la anterior, también se diferencian los mejores jugadores y alguna característica/variable de interés (que pueden cambiar)
Se puede ubicar también un jugador en particular. Con la siguiente linea de comando, por ejemplo, Leo Messi:
#Elegimos un jugador
recherche_joueur=df_fifa22["short_name"]=='L. Messi'
#recherche_joueur=df_n["Name"]==df_n["Name"][4000] # acá podemos elegir otro
bool_crack=df_fifa22["overall"] > 85
bool_elecc=df_fifa22["club_name"]=='FC Barcelona'
bool_no_crack=df_fifa22["overall"]<86
bool_machinegun=df_fifa22["short_name"]=='MachineGun'
#Elegir dos números entre 0 y de n_skills-1
skill_1=skills_ratings[6] #6
skill_2=skills_ratings[25] #25
Gráfica con plotly
palette=['navy','red','#A2D5F2','orange','green','pink']
data=[]
n_crack =go.Scatter(x=df_skills.loc[bool_crack,skill_1], y=df_skills.loc[bool_crack,skill_2],name='Crack',
text=df_fifa22.loc[bool_crack,'short_name'],
textfont=dict(family='sans serif',size=15,color='black'),
opacity=0.9,marker=dict(color=palette[2],size=7),mode='markers+text')
n_no_crack =go.Scatter(x=df_skills.loc[bool_no_crack,skill_1], y=df_skills.loc[bool_no_crack,skill_2],name='Average player',
text=df_fifa22.loc[bool_no_crack,'short_name'],
opacity=0.6,marker=dict(color=palette[1],size=3),mode='markers')
n_elecc=go.Scatter(x=df_skills.loc[bool_elecc,skill_1], y=df_skills.loc[bool_elecc,skill_2],name='Elección',
text=df_fifa22.loc[bool_elecc,'short_name'],
opacity=0.6,marker=dict(color=palette[0],size=5),mode='markers')
n_machinegun =go.Scatter(x=df_skills.loc[bool_machinegun,skill_1], y=df_skills.loc[bool_machinegun,skill_2],name='Perfect player',
textfont=dict(family='sans serif',size=20,color='black'),
opacity=0.6,marker=dict(color=palette[3],size=30),mode='markers+text')
joueur_recherche =go.Scatter(x=df_skills.loc[recherche_joueur,skill_1], y=df_skills.loc[recherche_joueur,skill_2],name='Searched player',
text=df_fifa22.loc[recherche_joueur,'short_name'],
textfont=dict(family='sans serif',size=20,color='black'),
opacity=1,marker=dict(color=palette[4],size=40),mode='markers+text')
data=[n_no_crack,n_elecc
,n_crack,n_machinegun,joueur_recherche]
layout = go.Layout(title="Fifa Players",titlefont=dict(size=20),
xaxis=dict(title=skill_1),
yaxis=dict(title=skill_2),
autosize=False, width=1000,height=650)
fig = go.Figure(data=data, layout=layout)
#En Colab
fig.show()
Identifiquemos algunos jugadores por nombre
print(df_fifa22.loc[0:43,"short_name"])
0 L. Messi 1 R. Lewandowski 2 Cristiano Ronaldo 3 Neymar Jr 4 K. De Bruyne 5 J. Oblak 6 K. Mbappé 7 M. Neuer 8 M. ter Stegen 9 H. Kane 10 N. Kanté 11 K. Benzema 12 T. Courtois 13 H. Son 14 Casemiro 15 V. van Dijk 16 S. Mané 17 M. Salah 18 Ederson 19 J. Kimmich 20 Alisson 21 G. Donnarumma 22 Sergio Ramos 23 L. Suárez 24 T. Kroos 25 R. Lukaku 26 K. Navas 27 R. Sterling 28 Bruno Fernandes 29 E. Haaland 30 S. Agüero 31 H. Lloris 32 L. Modrić 33 Á. Di María 34 W. Szczęsny 35 T. Müller 36 C. Immobile 37 P. Pogba 38 M. Verratti 39 Marquinhos 40 L. Goretzka 41 P. Dybala 42 A. Robertson 43 F. de Jong Name: short_name, dtype: object
El objetivo es determinar la cantidad de clusters que el metodo propone como cantidad optima
# Analisis para determinar el hiperparámetro n_clusters, variando de 2 a 11 clusters
scores = [KMeans(n_clusters=i).fit(df_skills).inertia_ for i in range(2,12)]
plt.figure(figsize=(12,9))
plt.plot(np.arange(2, 12), scores, marker='o', linestyle='--')
plt.xlabel('Number of clusters')
plt.ylabel("Inertia")
plt.title("Inertia of k-Means versus number of clusters")
Text(0.5, 1.0, 'Inertia of k-Means versus number of clusters')
# Número de clusters definido por el metodo del codo
clusters =4
# Creo una variable con el metodo KMeans
km = KMeans(n_clusters=clusters)
# Enterno mi modelo utilizando 34 dimensiones que forman las variables definidas como skills
km.fit(df_skills)
KMeans(n_clusters=4)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
KMeans(n_clusters=4)
# Diferentes clusters asignadas a cada jugador del data frame df_skills por el algoritmo que entrenamos
clusters = km.labels_
print('Kmeans encontró: ', max(km.labels_)+1, 'clusters')
Kmeans encontró: 4 clusters
# Copio la base de analisis df_fifa22 a la df_KMeans y le agrego la columna con el cluster generados a partir de entrenar el modelo con
# las 34 instancias que forman parte de df_skill
df_KMeans=df_fifa22.copy()
df_KMeans['KMeans'] = clusters
df_KMeans
| sofifa_id | player_url | short_name | long_name | player_positions | overall | potential | value_eur | wage_eur | age | dob | height_cm | weight_kg | club_team_id | club_name | league_name | league_level | club_position | club_jersey_number | club_loaned_from | club_joined | club_contract_valid_until | nationality_id | nationality_name | nation_team_id | nation_position | nation_jersey_number | preferred_foot | weak_foot | skill_moves | international_reputation | work_rate | body_type | real_face | release_clause_eur | player_tags | player_traits | pace | shooting | passing | dribbling | defending | physic | attacking_crossing | attacking_finishing | attacking_heading_accuracy | attacking_short_passing | attacking_volleys | skill_dribbling | skill_curve | ... | mentality_aggression | mentality_interceptions | mentality_positioning | mentality_vision | mentality_penalties | mentality_composure | defending_marking_awareness | defending_standing_tackle | defending_sliding_tackle | goalkeeping_diving | goalkeeping_handling | goalkeeping_kicking | goalkeeping_positioning | goalkeeping_reflexes | goalkeeping_speed | ls | st | rs | lw | lf | cf | rf | rw | lam | cam | ram | lm | lcm | cm | rcm | rm | lwb | ldm | cdm | rdm | rwb | lb | lcb | cb | rcb | rb | gk | player_face_url | club_logo_url | club_flag_url | nation_logo_url | nation_flag_url | player_positions_principal | Position2 | KMeans | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 158023.0 | https://sofifa.com/player/158023/lionel-messi/... | L. Messi | Lionel Andrés Messi Cuccittini | RW, ST, CF | 93 | 93.0 | 78000000.0 | 320000.0 | 34.0 | 1987-06-24 | 170.0 | 72.0 | 73.0 | Paris Saint-Germain | French Ligue 1 | 1.0 | RW | 30.0 | NaN | 2021-08-10 | 2023.0 | 52.0 | Argentina | 1369.0 | RW | 10.0 | Left | 4.0 | 4.0 | 5.0 | Medium/Low | Unique | Yes | 144300000.0 | #Dribbler, #Distance Shooter, #FK Specialist, ... | Finesse Shot, Long Shot Taker (AI), Playmaker ... | 85.0 | 92.0 | 91.0 | 95.0 | 34.0 | 65.0 | 85 | 95 | 70 | 91 | 88 | 96 | 93 | ... | 44 | 40 | 93 | 95 | 75 | 96 | 20 | 35 | 24 | 6 | 11 | 15 | 14 | 8 | NaN | 89+3 | 89+3 | 89+3 | 92 | 93 | 93 | 93 | 92 | 93 | 93 | 93 | 91+2 | 87+3 | 87+3 | 87+3 | 91+2 | 66+3 | 64+3 | 64+3 | 64+3 | 66+3 | 61+3 | 50+3 | 50+3 | 50+3 | 61+3 | 19+3 | https://cdn.sofifa.net/players/158/023/22_120.png | https://cdn.sofifa.net/teams/73/60.png | https://cdn.sofifa.net/flags/fr.png | https://cdn.sofifa.net/teams/1369/60.png | https://cdn.sofifa.net/flags/ar.png | RW | Forward | 2 |
| 1 | 188545.0 | https://sofifa.com/player/188545/robert-lewand... | R. Lewandowski | Robert Lewandowski | ST | 92 | 92.0 | 119500000.0 | 270000.0 | 32.0 | 1988-08-21 | 185.0 | 81.0 | 21.0 | FC Bayern München | German 1. Bundesliga | 1.0 | ST | 9.0 | NaN | 2014-07-01 | 2023.0 | 37.0 | Poland | 1353.0 | RS | 9.0 | Right | 4.0 | 4.0 | 5.0 | High/Medium | Unique | Yes | 197200000.0 | #Aerial Threat, #Distance Shooter, #Clinical F... | Solid Player, Finesse Shot, Outside Foot Shot,... | 78.0 | 92.0 | 79.0 | 86.0 | 44.0 | 82.0 | 71 | 95 | 90 | 85 | 89 | 85 | 79 | ... | 81 | 49 | 95 | 81 | 90 | 88 | 35 | 42 | 19 | 15 | 6 | 12 | 8 | 10 | NaN | 90+2 | 90+2 | 90+2 | 85 | 88 | 88 | 88 | 85 | 86+3 | 86+3 | 86+3 | 84+3 | 80+3 | 80+3 | 80+3 | 84+3 | 64+3 | 66+3 | 66+3 | 66+3 | 64+3 | 61+3 | 60+3 | 60+3 | 60+3 | 61+3 | 19+3 | https://cdn.sofifa.net/players/188/545/22_120.png | https://cdn.sofifa.net/teams/21/60.png | https://cdn.sofifa.net/flags/de.png | https://cdn.sofifa.net/teams/1353/60.png | https://cdn.sofifa.net/flags/pl.png | ST | Forward | 2 |
| 2 | 20801.0 | https://sofifa.com/player/20801/c-ronaldo-dos-... | Cristiano Ronaldo | Cristiano Ronaldo dos Santos Aveiro | ST, LW | 91 | 91.0 | 45000000.0 | 270000.0 | 36.0 | 1985-02-05 | 187.0 | 83.0 | 11.0 | Manchester United | English Premier League | 1.0 | ST | 7.0 | NaN | 2021-08-27 | 2023.0 | 38.0 | Portugal | 1354.0 | ST | 7.0 | Right | 4.0 | 5.0 | 5.0 | High/Low | Unique | Yes | 83300000.0 | #Aerial Threat, #Dribbler, #Distance Shooter, ... | Power Free-Kick, Flair, Long Shot Taker (AI), ... | 87.0 | 94.0 | 80.0 | 88.0 | 34.0 | 75.0 | 87 | 95 | 90 | 80 | 86 | 88 | 81 | ... | 63 | 29 | 95 | 76 | 88 | 95 | 24 | 32 | 24 | 7 | 11 | 15 | 14 | 11 | NaN | 90+1 | 90+1 | 90+1 | 88 | 89 | 89 | 89 | 88 | 86+3 | 86+3 | 86+3 | 86+3 | 78+3 | 78+3 | 78+3 | 86+3 | 63+3 | 59+3 | 59+3 | 59+3 | 63+3 | 60+3 | 53+3 | 53+3 | 53+3 | 60+3 | 20+3 | https://cdn.sofifa.net/players/020/801/22_120.png | https://cdn.sofifa.net/teams/11/60.png | https://cdn.sofifa.net/flags/gb-eng.png | https://cdn.sofifa.net/teams/1354/60.png | https://cdn.sofifa.net/flags/pt.png | ST | Forward | 2 |
| 3 | 190871.0 | https://sofifa.com/player/190871/neymar-da-sil... | Neymar Jr | Neymar da Silva Santos Júnior | LW, CAM | 91 | 91.0 | 129000000.0 | 270000.0 | 29.0 | 1992-02-05 | 175.0 | 68.0 | 73.0 | Paris Saint-Germain | French Ligue 1 | 1.0 | LW | 10.0 | NaN | 2017-08-03 | 2025.0 | 54.0 | Brazil | NaN | NaN | NaN | Right | 5.0 | 5.0 | 5.0 | High/Medium | Unique | Yes | 238700000.0 | #Speedster, #Dribbler, #Playmaker, #FK Special... | Injury Prone, Flair, Speed Dribbler (AI), Play... | 91.0 | 83.0 | 86.0 | 94.0 | 37.0 | 63.0 | 85 | 83 | 63 | 86 | 86 | 95 | 88 | ... | 63 | 37 | 86 | 90 | 93 | 93 | 35 | 32 | 29 | 9 | 9 | 15 | 15 | 11 | NaN | 83+3 | 83+3 | 83+3 | 90 | 88 | 88 | 88 | 90 | 89+2 | 89+2 | 89+2 | 89+2 | 82+3 | 82+3 | 82+3 | 89+2 | 67+3 | 63+3 | 63+3 | 63+3 | 67+3 | 62+3 | 50+3 | 50+3 | 50+3 | 62+3 | 20+3 | https://cdn.sofifa.net/players/190/871/22_120.png | https://cdn.sofifa.net/teams/73/60.png | https://cdn.sofifa.net/flags/fr.png | NaN | https://cdn.sofifa.net/flags/br.png | LW | Forward | 2 |
| 4 | 192985.0 | https://sofifa.com/player/192985/kevin-de-bruy... | K. De Bruyne | Kevin De Bruyne | CM, CAM | 91 | 91.0 | 125500000.0 | 350000.0 | 30.0 | 1991-06-28 | 181.0 | 70.0 | 10.0 | Manchester City | English Premier League | 1.0 | RCM | 17.0 | NaN | 2015-08-30 | 2025.0 | 7.0 | Belgium | 1325.0 | RCM | 7.0 | Right | 5.0 | 4.0 | 4.0 | High/High | Unique | Yes | 232200000.0 | #Dribbler, #Playmaker, #Engine, #Distance Shoo... | Injury Prone, Leadership, Early Crosser, Long ... | 76.0 | 86.0 | 93.0 | 88.0 | 64.0 | 78.0 | 94 | 82 | 55 | 94 | 82 | 88 | 85 | ... | 76 | 66 | 88 | 94 | 83 | 89 | 68 | 65 | 53 | 15 | 13 | 5 | 10 | 13 | NaN | 83+3 | 83+3 | 83+3 | 88 | 87 | 87 | 87 | 88 | 89+2 | 89+2 | 89+2 | 89+2 | 89+2 | 89+2 | 89+2 | 89+2 | 79+3 | 80+3 | 80+3 | 80+3 | 79+3 | 75+3 | 69+3 | 69+3 | 69+3 | 75+3 | 21+3 | https://cdn.sofifa.net/players/192/985/22_120.png | https://cdn.sofifa.net/teams/10/60.png | https://cdn.sofifa.net/flags/gb-eng.png | https://cdn.sofifa.net/teams/1325/60.png | https://cdn.sofifa.net/flags/be.png | CM | Midfielder | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 4589 | 262411.0 | https://sofifa.com/player/262411/miguel-crespo... | Miguel Crespo | Miguel Crespo da Silva | CM | 71 | 76.0 | 2800000.0 | 24000.0 | 24.0 | 1996-09-11 | 183.0 | 78.0 | 326.0 | Fenerbahçe SK | Turkish Süper Lig | 1.0 | SUB | 27.0 | NaN | 2021-09-06 | 2024.0 | 38.0 | Portugal | NaN | NaN | NaN | Right | 3.0 | 2.0 | 1.0 | High/High | Normal (170-185) | No | 5500000.0 | NaN | Playmaker (AI) | 67.0 | 64.0 | 70.0 | 72.0 | 68.0 | 79.0 | 66 | 60 | 62 | 73 | 58 | 73 | 73 | ... | 83 | 70 | 68 | 70 | 66 | 74 | 61 | 74 | 69 | 11 | 8 | 8 | 15 | 8 | NaN | 67+2 | 67+2 | 67+2 | 69 | 69 | 69 | 69 | 69 | 70+2 | 70+2 | 70+2 | 70+2 | 71+2 | 71+2 | 71+2 | 70+2 | 70+2 | 72+2 | 72+2 | 72+2 | 70+2 | 70+2 | 70+2 | 70+2 | 70+2 | 70+2 | 17+2 | https://cdn.sofifa.net/players/262/411/22_120.png | https://cdn.sofifa.net/teams/326/60.png | https://cdn.sofifa.net/flags/tr.png | NaN | https://cdn.sofifa.net/flags/pt.png | CM | Midfielder | 0 |
| 4590 | 262815.0 | https://sofifa.com/player/262815/giannis-kotsi... | G. Kotsiras | Giannis Kotsiras | RB, RM | 71 | 71.0 | 1700000.0 | 600.0 | 28.0 | 1992-12-16 | 183.0 | 77.0 | 1884.0 | Panathinaikos FC | Greek Super League | 1.0 | RB | 33.0 | NaN | 2021-06-22 | 2024.0 | 22.0 | Greece | NaN | NaN | NaN | Right | 4.0 | 2.0 | 1.0 | High/Medium | Normal (170-185) | No | 3700000.0 | NaN | NaN | 81.0 | 51.0 | 66.0 | 70.0 | 66.0 | 67.0 | 70 | 61 | 49 | 71 | 29 | 71 | 74 | ... | 50 | 64 | 65 | 59 | 40 | 70 | 68 | 70 | 67 | 12 | 12 | 11 | 15 | 9 | NaN | 62+2 | 62+2 | 62+2 | 68 | 65 | 65 | 65 | 68 | 66+2 | 66+2 | 66+2 | 69+2 | 66+2 | 66+2 | 66+2 | 69+2 | 70+1 | 67+2 | 67+2 | 67+2 | 70+1 | 69+2 | 65+2 | 65+2 | 65+2 | 69+2 | 17+2 | https://cdn.sofifa.net/players/262/815/22_120.png | https://cdn.sofifa.net/teams/1884/60.png | https://cdn.sofifa.net/flags/gr.png | NaN | https://cdn.sofifa.net/flags/gr.png | RB | Defender | 0 |
| 4591 | 263230.0 | https://sofifa.com/player/263230/milutin-osmaj... | M. Osmajić | Milutin Osmajić | ST, LM, RM | 71 | 81.0 | 4200000.0 | 15000.0 | 21.0 | 1999-07-25 | 185.0 | 76.0 | 1968.0 | Cádiz CF | Spain Primera Division | 1.0 | SUB | 29.0 | NaN | 2021-07-10 | 2024.0 | 15.0 | Montenegro | NaN | NaN | NaN | Right | 5.0 | 3.0 | 1.0 | Medium/Medium | Stocky (185+) | No | 10100000.0 | NaN | NaN | 76.0 | 70.0 | 60.0 | 65.0 | 39.0 | 71.0 | 64 | 74 | 72 | 63 | 58 | 66 | 56 | ... | 70 | 30 | 73 | 59 | 63 | 59 | 35 | 38 | 34 | 14 | 15 | 14 | 12 | 8 | NaN | 71+2 | 71+2 | 71+2 | 68 | 69 | 69 | 69 | 68 | 66+2 | 66+2 | 66+2 | 67+2 | 61+2 | 61+2 | 61+2 | 67+2 | 54+2 | 52+2 | 52+2 | 52+2 | 54+2 | 53+2 | 51+2 | 51+2 | 51+2 | 53+2 | 19+2 | https://cdn.sofifa.net/players/263/230/22_120.png | https://cdn.sofifa.net/teams/1968/60.png | https://cdn.sofifa.net/flags/es.png | NaN | https://cdn.sofifa.net/flags/me.png | ST | Forward | 2 |
| 4592 | 263383.0 | https://sofifa.com/player/263383/stjepan-lonca... | S. Lončar | Stjepan Lončar | CDM, CAM | 71 | 79.0 | 3900000.0 | 550.0 | 24.0 | 1996-11-10 | 187.0 | 74.0 | 1874.0 | Ferencvárosi TC | Hungarian Nemzeti Bajnokság I | 1.0 | SUB | 44.0 | NaN | 2021-07-16 | 2026.0 | 8.0 | Bosnia and Herzegovina | NaN | NaN | NaN | Left | 3.0 | 2.0 | 1.0 | High/High | Normal (185+) | No | 9400000.0 | NaN | Solid Player, Playmaker (AI) | 61.0 | 62.0 | 67.0 | 71.0 | 65.0 | 73.0 | 57 | 61 | 54 | 70 | 61 | 71 | 60 | ... | 72 | 66 | 68 | 71 | 48 | 74 | 64 | 70 | 61 | 13 | 10 | 10 | 6 | 9 | NaN | 66+2 | 66+2 | 66+2 | 67 | 68 | 68 | 68 | 67 | 70+2 | 70+2 | 70+2 | 69+2 | 72+2 | 72+2 | 72+2 | 69+2 | 67+2 | 70+2 | 70+2 | 70+2 | 67+2 | 66+2 | 66+2 | 66+2 | 66+2 | 66+2 | 16+2 | https://cdn.sofifa.net/players/263/383/22_120.png | https://cdn.sofifa.net/teams/1874/60.png | https://cdn.sofifa.net/flags/hu.png | NaN | https://cdn.sofifa.net/flags/ba.png | CDM | Midfielder | 0 |
| 4593 | NaN | NaN | MachineGun | NaN | NaN | 99 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 99 | 99 | 99 | 99 | 99 | 99 | 99 | ... | 99 | 99 | 99 | 99 | 99 | 99 | 99 | 99 | 99 | 99 | 99 | 99 | 99 | 99 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | nan | 0 |
4594 rows × 113 columns
# Cantidad de registros asociados a cada clusters
df_KMeans['KMeans'].value_counts()
0 1726 2 1489 3 935 1 444 Name: KMeans, dtype: int64
# Corremos silhouette para saber que tan emparejados estan los elementos de un mismo grupo y que tan diferentes son del os otros grupos
silhouette_score = metrics.silhouette_score(df_skills, clusters, metric='euclidean')
silhouette_score
0.32424431011057114
# Para que no me genere una distoncion de los grafico y tablas de analisis eliminamos de KMeans el MacineGun
df_KMeans.drop(df_KMeans.loc[df_KMeans['short_name']=='MachineGun'].index, inplace=True)
# Armo una crosstable con el porcentaje de los registros asociados a cada cluster creado por KMeans
cross = pd.crosstab(df_KMeans.KMeans, df_KMeans.Position2, normalize="index")
cross
| Position2 | Defender | Forward | GK | Midfielder |
|---|---|---|---|---|
| KMeans | ||||
| 0 | 0.376812 | 0.079420 | 0.0 | 0.543768 |
| 1 | 0.000000 | 0.000000 | 1.0 | 0.000000 |
| 2 | 0.000000 | 0.802552 | 0.0 | 0.197448 |
| 3 | 0.893048 | 0.000000 | 0.0 | 0.106952 |
# Armo un grafico para saber porcentualmente como se distribuyen los registros de las cuatro posiciones en los distintos cluster
plt.figure(figsize = (15,8))
sns.heatmap(cross, annot=True, cmap = "OrRd")
<AxesSubplot:xlabel='Position2', ylabel='KMeans'>
# Eligo dos dos variables de los skills definiendo numeros entre 0 y los n_skills
skill_1=skills_ratings[9]
skill_2=skills_ratings[10]
# Defino dos grupos booleanos para agregar los nombres en la grafica separando dos grupos de jugadores
bool_crack = df_fifa22["overall"] > 85
bool_crack.value_counts()
False 4524 True 70 Name: overall, dtype: int64
# 1° Defino un grupo del data frame df_skills con la primer variable en funcion de la segunda y
# los coloreo en funcion del cluster asignado por KMean
kmean_clusters = go.Scatter(x=df_skills[skill_1], y=df_skills[skill_2], mode='markers', text=df_KMeans.loc[:,'short_name'],
marker=dict(size=5, color=clusters.astype(np.float), #set color equal to a variable
colorscale='Portland', showscale=False))
# 2° Defino el grupo de de jugadores que forman el grupo de bool_crack considerando
# la primer variable en funcion de la segunda. Se arma este grupo selecto para que aparezcan los nombres
# pero los valores on iguales a los presentados en el grafico anteriror
crack =go.Scatter(x=df_skills.loc[bool_crack,skill_1], y=df_skills.loc[bool_crack,skill_2], name='Nombres de Ckacks',
text=df_KMeans.loc[bool_crack,'short_name'],
textfont=dict(family='sans serif',size=10,color='black'),
opacity=0.9,mode='text')
data=[kmean_clusters,crack]
layout = go.Layout(title="Clustering K means para Cracks!!!",titlefont=dict(size=20),
xaxis=dict(title=skill_1),
yaxis=dict(title=skill_2),
autosize=False, width=1000,height=650)
fig = go.Figure(data=data, layout=layout)
fig.show()
# Defino dos grupos booleanos para agregar los nombres en la grafica separando dos grupos de jugadores
# Vemos que tenemos muchos nombre para imprimir en la grafica con lo cual puede salir una mancha que tape los clusters
bool_no_crack = df_fifa22["overall"] <= 71
bool_no_crack.value_counts()
False 3828 True 766 Name: overall, dtype: int64
# 1° Defino un grupo del data frame df_skills con la primer variable en funcion de la segunda y
# los coloreo en funcion del cluster asignado por KMean
kmean_clusters = go.Scatter(x=df_skills[skill_1], y=df_skills[skill_2], mode='markers', text=df_KMeans.loc[:,'short_name'],
marker=dict(size=5, color=clusters.astype(np.float), #set color equal to a variable
colorscale='Portland', showscale=False))
# 2° Defino el grupo de de jugadores que forman el grupo de bool_crack considerando
# la primer variable en funcion de la segunda. Se arma este grupo selecto para que aparezcan los nombres
# pero los valores on iguales a los presentados en el grafico anteriror
no_crack =go.Scatter(x=df_skills.loc[bool_no_crack,skill_1], y=df_skills.loc[bool_no_crack,skill_2], name='Nombres Regulares',
# text=df_KMeans.loc[bool_no_crack,'short_name'],
textfont=dict(family='sans serif',size=10,color='black'),
opacity=0.9,mode='text')
data=[kmean_clusters,no_crack]
layout = go.Layout(title="Clustering K means para Regulares!!!",titlefont=dict(size=20),
xaxis=dict(title=skill_1),
yaxis=dict(title=skill_2),
autosize=False, width=1000,height=650)
fig = go.Figure(data=data, layout=layout)
fig.show()
# Definimos las dos variables que vamos a analisar que las sacamos del df_fifa22 y son parte del skills_ratings
df_knn = df_fifa22.copy()[['movement_acceleration', 'skill_ball_control']]
#data_nn = df_fifa22.copy()[['movement_acceleration', 'skill_ball_control']]
# Utilizo el metodo de Vecinos mas cercanos KNN para 4 vecinos defino la variable
knn = NearestNeighbors(n_neighbors=4)
vecinos = knn.fit(df_knn)
# Saco la matriz de distancias de cada punto analizado (filas) respecto cada uno
# de los vecinos mas cercanos (columnas)
distancia, indices = vecinos.kneighbors(df_knn)
# Ordeno por filas las 4593 filas que forman la matriz del df_knn reducido del a dos variables
distancia = np.sort(distancia, axis=0)
print('Cantidad de puntos analisados que forman las dos varibales seleccionadas:',len(distancia))
print('\nMatriz de distancia de cada punto con respecto a los vecinos seleccionados\n', distancia)
Cantidad de puntos analisados que forman las dos varibales seleccionadas: 4594 Matriz de distancia de cada punto con respecto a los vecinos seleccionados [[0. 0. 0. 0. ] [0. 0. 0. 0. ] [0. 0. 0. 0. ] ... [0. 4.47213595 5.38516481 5.83095189] [0. 5.09901951 5.38516481 6.08276253] [0. 7.21110255 8.24621125 8.54400375]]
# De la matriz que calculamos anteriormente tomamos una columna para hacer el analisis del mejor EPSILON
columna=3
distancia_vector = distancia[:,columna]
print('Datos de la columna de distancias seleccionada de la matriz\n', distancia_vector)
# Me armo el vector de elementos que toma todas las filas incluidas en la definicion de distancias
i = np.arange(len(distancia_vector))
print('\nCantidad de puntos analisados que forman las dos varibales seleccionadas\n', i)
Datos de la columna de distancias seleccionada de la matriz [0. 0. 0. ... 5.83095189 6.08276253 8.54400375] Cantidad de puntos analisados que forman las dos varibales seleccionadas [ 0 1 2 ... 4591 4592 4593]
# Determinamos el EPSILON correcto en funcion de los dos vectores que armamos antes
# PARA DETERMINAR EL EPSILON CORRECTO TENEMOS QUE ANALIZAR EL MAYOR QUIEBRE QUE SE PRODUCE EN LA GRAFICA
plt.figure(figsize=(12,9))
sns.lineplot(x=i, y=distancia_vector)
plt.xlabel("Punto")
plt.ylabel("Distancia")
Text(0, 0.5, 'Distancia')
Podemos determinar que utilizando cualquier columna de las distancias que se generan por cada punto con respecto, en este caso a los cuatro vecinos mas cercanos, el EPSILON que se grafica con el quibre mas pronunciado seria 1
# Creo una variable con el metodo DBScan y utilizo el Epsilon que calculamos antes
dbs = DBSCAN(eps=1, min_samples=30)
# Realizamos una copia de la base de datos original
df_dbscan = df_fifa22.copy()
# Enterno mi modelo utilizando solo 2 dimensiones represntadas por las variables definidas como skills
df_dbscan['dbscan'] = dbs.fit_predict(df_dbscan[['movement_acceleration', 'skill_ball_control']])
df_dbscan
| sofifa_id | player_url | short_name | long_name | player_positions | overall | potential | value_eur | wage_eur | age | dob | height_cm | weight_kg | club_team_id | club_name | league_name | league_level | club_position | club_jersey_number | club_loaned_from | club_joined | club_contract_valid_until | nationality_id | nationality_name | nation_team_id | nation_position | nation_jersey_number | preferred_foot | weak_foot | skill_moves | international_reputation | work_rate | body_type | real_face | release_clause_eur | player_tags | player_traits | pace | shooting | passing | dribbling | defending | physic | attacking_crossing | attacking_finishing | attacking_heading_accuracy | attacking_short_passing | attacking_volleys | skill_dribbling | skill_curve | ... | mentality_aggression | mentality_interceptions | mentality_positioning | mentality_vision | mentality_penalties | mentality_composure | defending_marking_awareness | defending_standing_tackle | defending_sliding_tackle | goalkeeping_diving | goalkeeping_handling | goalkeeping_kicking | goalkeeping_positioning | goalkeeping_reflexes | goalkeeping_speed | ls | st | rs | lw | lf | cf | rf | rw | lam | cam | ram | lm | lcm | cm | rcm | rm | lwb | ldm | cdm | rdm | rwb | lb | lcb | cb | rcb | rb | gk | player_face_url | club_logo_url | club_flag_url | nation_logo_url | nation_flag_url | player_positions_principal | Position2 | dbscan | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 158023.0 | https://sofifa.com/player/158023/lionel-messi/... | L. Messi | Lionel Andrés Messi Cuccittini | RW, ST, CF | 93 | 93.0 | 78000000.0 | 320000.0 | 34.0 | 1987-06-24 | 170.0 | 72.0 | 73.0 | Paris Saint-Germain | French Ligue 1 | 1.0 | RW | 30.0 | NaN | 2021-08-10 | 2023.0 | 52.0 | Argentina | 1369.0 | RW | 10.0 | Left | 4.0 | 4.0 | 5.0 | Medium/Low | Unique | Yes | 144300000.0 | #Dribbler, #Distance Shooter, #FK Specialist, ... | Finesse Shot, Long Shot Taker (AI), Playmaker ... | 85.0 | 92.0 | 91.0 | 95.0 | 34.0 | 65.0 | 85 | 95 | 70 | 91 | 88 | 96 | 93 | ... | 44 | 40 | 93 | 95 | 75 | 96 | 20 | 35 | 24 | 6 | 11 | 15 | 14 | 8 | NaN | 89+3 | 89+3 | 89+3 | 92 | 93 | 93 | 93 | 92 | 93 | 93 | 93 | 91+2 | 87+3 | 87+3 | 87+3 | 91+2 | 66+3 | 64+3 | 64+3 | 64+3 | 66+3 | 61+3 | 50+3 | 50+3 | 50+3 | 61+3 | 19+3 | https://cdn.sofifa.net/players/158/023/22_120.png | https://cdn.sofifa.net/teams/73/60.png | https://cdn.sofifa.net/flags/fr.png | https://cdn.sofifa.net/teams/1369/60.png | https://cdn.sofifa.net/flags/ar.png | RW | Forward | -1 |
| 1 | 188545.0 | https://sofifa.com/player/188545/robert-lewand... | R. Lewandowski | Robert Lewandowski | ST | 92 | 92.0 | 119500000.0 | 270000.0 | 32.0 | 1988-08-21 | 185.0 | 81.0 | 21.0 | FC Bayern München | German 1. Bundesliga | 1.0 | ST | 9.0 | NaN | 2014-07-01 | 2023.0 | 37.0 | Poland | 1353.0 | RS | 9.0 | Right | 4.0 | 4.0 | 5.0 | High/Medium | Unique | Yes | 197200000.0 | #Aerial Threat, #Distance Shooter, #Clinical F... | Solid Player, Finesse Shot, Outside Foot Shot,... | 78.0 | 92.0 | 79.0 | 86.0 | 44.0 | 82.0 | 71 | 95 | 90 | 85 | 89 | 85 | 79 | ... | 81 | 49 | 95 | 81 | 90 | 88 | 35 | 42 | 19 | 15 | 6 | 12 | 8 | 10 | NaN | 90+2 | 90+2 | 90+2 | 85 | 88 | 88 | 88 | 85 | 86+3 | 86+3 | 86+3 | 84+3 | 80+3 | 80+3 | 80+3 | 84+3 | 64+3 | 66+3 | 66+3 | 66+3 | 64+3 | 61+3 | 60+3 | 60+3 | 60+3 | 61+3 | 19+3 | https://cdn.sofifa.net/players/188/545/22_120.png | https://cdn.sofifa.net/teams/21/60.png | https://cdn.sofifa.net/flags/de.png | https://cdn.sofifa.net/teams/1353/60.png | https://cdn.sofifa.net/flags/pl.png | ST | Forward | -1 |
| 2 | 20801.0 | https://sofifa.com/player/20801/c-ronaldo-dos-... | Cristiano Ronaldo | Cristiano Ronaldo dos Santos Aveiro | ST, LW | 91 | 91.0 | 45000000.0 | 270000.0 | 36.0 | 1985-02-05 | 187.0 | 83.0 | 11.0 | Manchester United | English Premier League | 1.0 | ST | 7.0 | NaN | 2021-08-27 | 2023.0 | 38.0 | Portugal | 1354.0 | ST | 7.0 | Right | 4.0 | 5.0 | 5.0 | High/Low | Unique | Yes | 83300000.0 | #Aerial Threat, #Dribbler, #Distance Shooter, ... | Power Free-Kick, Flair, Long Shot Taker (AI), ... | 87.0 | 94.0 | 80.0 | 88.0 | 34.0 | 75.0 | 87 | 95 | 90 | 80 | 86 | 88 | 81 | ... | 63 | 29 | 95 | 76 | 88 | 95 | 24 | 32 | 24 | 7 | 11 | 15 | 14 | 11 | NaN | 90+1 | 90+1 | 90+1 | 88 | 89 | 89 | 89 | 88 | 86+3 | 86+3 | 86+3 | 86+3 | 78+3 | 78+3 | 78+3 | 86+3 | 63+3 | 59+3 | 59+3 | 59+3 | 63+3 | 60+3 | 53+3 | 53+3 | 53+3 | 60+3 | 20+3 | https://cdn.sofifa.net/players/020/801/22_120.png | https://cdn.sofifa.net/teams/11/60.png | https://cdn.sofifa.net/flags/gb-eng.png | https://cdn.sofifa.net/teams/1354/60.png | https://cdn.sofifa.net/flags/pt.png | ST | Forward | -1 |
| 3 | 190871.0 | https://sofifa.com/player/190871/neymar-da-sil... | Neymar Jr | Neymar da Silva Santos Júnior | LW, CAM | 91 | 91.0 | 129000000.0 | 270000.0 | 29.0 | 1992-02-05 | 175.0 | 68.0 | 73.0 | Paris Saint-Germain | French Ligue 1 | 1.0 | LW | 10.0 | NaN | 2017-08-03 | 2025.0 | 54.0 | Brazil | NaN | NaN | NaN | Right | 5.0 | 5.0 | 5.0 | High/Medium | Unique | Yes | 238700000.0 | #Speedster, #Dribbler, #Playmaker, #FK Special... | Injury Prone, Flair, Speed Dribbler (AI), Play... | 91.0 | 83.0 | 86.0 | 94.0 | 37.0 | 63.0 | 85 | 83 | 63 | 86 | 86 | 95 | 88 | ... | 63 | 37 | 86 | 90 | 93 | 93 | 35 | 32 | 29 | 9 | 9 | 15 | 15 | 11 | NaN | 83+3 | 83+3 | 83+3 | 90 | 88 | 88 | 88 | 90 | 89+2 | 89+2 | 89+2 | 89+2 | 82+3 | 82+3 | 82+3 | 89+2 | 67+3 | 63+3 | 63+3 | 63+3 | 67+3 | 62+3 | 50+3 | 50+3 | 50+3 | 62+3 | 20+3 | https://cdn.sofifa.net/players/190/871/22_120.png | https://cdn.sofifa.net/teams/73/60.png | https://cdn.sofifa.net/flags/fr.png | NaN | https://cdn.sofifa.net/flags/br.png | LW | Forward | -1 |
| 4 | 192985.0 | https://sofifa.com/player/192985/kevin-de-bruy... | K. De Bruyne | Kevin De Bruyne | CM, CAM | 91 | 91.0 | 125500000.0 | 350000.0 | 30.0 | 1991-06-28 | 181.0 | 70.0 | 10.0 | Manchester City | English Premier League | 1.0 | RCM | 17.0 | NaN | 2015-08-30 | 2025.0 | 7.0 | Belgium | 1325.0 | RCM | 7.0 | Right | 5.0 | 4.0 | 4.0 | High/High | Unique | Yes | 232200000.0 | #Dribbler, #Playmaker, #Engine, #Distance Shoo... | Injury Prone, Leadership, Early Crosser, Long ... | 76.0 | 86.0 | 93.0 | 88.0 | 64.0 | 78.0 | 94 | 82 | 55 | 94 | 82 | 88 | 85 | ... | 76 | 66 | 88 | 94 | 83 | 89 | 68 | 65 | 53 | 15 | 13 | 5 | 10 | 13 | NaN | 83+3 | 83+3 | 83+3 | 88 | 87 | 87 | 87 | 88 | 89+2 | 89+2 | 89+2 | 89+2 | 89+2 | 89+2 | 89+2 | 89+2 | 79+3 | 80+3 | 80+3 | 80+3 | 79+3 | 75+3 | 69+3 | 69+3 | 69+3 | 75+3 | 21+3 | https://cdn.sofifa.net/players/192/985/22_120.png | https://cdn.sofifa.net/teams/10/60.png | https://cdn.sofifa.net/flags/gb-eng.png | https://cdn.sofifa.net/teams/1325/60.png | https://cdn.sofifa.net/flags/be.png | CM | Midfielder | -1 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 4589 | 262411.0 | https://sofifa.com/player/262411/miguel-crespo... | Miguel Crespo | Miguel Crespo da Silva | CM | 71 | 76.0 | 2800000.0 | 24000.0 | 24.0 | 1996-09-11 | 183.0 | 78.0 | 326.0 | Fenerbahçe SK | Turkish Süper Lig | 1.0 | SUB | 27.0 | NaN | 2021-09-06 | 2024.0 | 38.0 | Portugal | NaN | NaN | NaN | Right | 3.0 | 2.0 | 1.0 | High/High | Normal (170-185) | No | 5500000.0 | NaN | Playmaker (AI) | 67.0 | 64.0 | 70.0 | 72.0 | 68.0 | 79.0 | 66 | 60 | 62 | 73 | 58 | 73 | 73 | ... | 83 | 70 | 68 | 70 | 66 | 74 | 61 | 74 | 69 | 11 | 8 | 8 | 15 | 8 | NaN | 67+2 | 67+2 | 67+2 | 69 | 69 | 69 | 69 | 69 | 70+2 | 70+2 | 70+2 | 70+2 | 71+2 | 71+2 | 71+2 | 70+2 | 70+2 | 72+2 | 72+2 | 72+2 | 70+2 | 70+2 | 70+2 | 70+2 | 70+2 | 70+2 | 17+2 | https://cdn.sofifa.net/players/262/411/22_120.png | https://cdn.sofifa.net/teams/326/60.png | https://cdn.sofifa.net/flags/tr.png | NaN | https://cdn.sofifa.net/flags/pt.png | CM | Midfielder | 0 |
| 4590 | 262815.0 | https://sofifa.com/player/262815/giannis-kotsi... | G. Kotsiras | Giannis Kotsiras | RB, RM | 71 | 71.0 | 1700000.0 | 600.0 | 28.0 | 1992-12-16 | 183.0 | 77.0 | 1884.0 | Panathinaikos FC | Greek Super League | 1.0 | RB | 33.0 | NaN | 2021-06-22 | 2024.0 | 22.0 | Greece | NaN | NaN | NaN | Right | 4.0 | 2.0 | 1.0 | High/Medium | Normal (170-185) | No | 3700000.0 | NaN | NaN | 81.0 | 51.0 | 66.0 | 70.0 | 66.0 | 67.0 | 70 | 61 | 49 | 71 | 29 | 71 | 74 | ... | 50 | 64 | 65 | 59 | 40 | 70 | 68 | 70 | 67 | 12 | 12 | 11 | 15 | 9 | NaN | 62+2 | 62+2 | 62+2 | 68 | 65 | 65 | 65 | 68 | 66+2 | 66+2 | 66+2 | 69+2 | 66+2 | 66+2 | 66+2 | 69+2 | 70+1 | 67+2 | 67+2 | 67+2 | 70+1 | 69+2 | 65+2 | 65+2 | 65+2 | 69+2 | 17+2 | https://cdn.sofifa.net/players/262/815/22_120.png | https://cdn.sofifa.net/teams/1884/60.png | https://cdn.sofifa.net/flags/gr.png | NaN | https://cdn.sofifa.net/flags/gr.png | RB | Defender | 0 |
| 4591 | 263230.0 | https://sofifa.com/player/263230/milutin-osmaj... | M. Osmajić | Milutin Osmajić | ST, LM, RM | 71 | 81.0 | 4200000.0 | 15000.0 | 21.0 | 1999-07-25 | 185.0 | 76.0 | 1968.0 | Cádiz CF | Spain Primera Division | 1.0 | SUB | 29.0 | NaN | 2021-07-10 | 2024.0 | 15.0 | Montenegro | NaN | NaN | NaN | Right | 5.0 | 3.0 | 1.0 | Medium/Medium | Stocky (185+) | No | 10100000.0 | NaN | NaN | 76.0 | 70.0 | 60.0 | 65.0 | 39.0 | 71.0 | 64 | 74 | 72 | 63 | 58 | 66 | 56 | ... | 70 | 30 | 73 | 59 | 63 | 59 | 35 | 38 | 34 | 14 | 15 | 14 | 12 | 8 | NaN | 71+2 | 71+2 | 71+2 | 68 | 69 | 69 | 69 | 68 | 66+2 | 66+2 | 66+2 | 67+2 | 61+2 | 61+2 | 61+2 | 67+2 | 54+2 | 52+2 | 52+2 | 52+2 | 54+2 | 53+2 | 51+2 | 51+2 | 51+2 | 53+2 | 19+2 | https://cdn.sofifa.net/players/263/230/22_120.png | https://cdn.sofifa.net/teams/1968/60.png | https://cdn.sofifa.net/flags/es.png | NaN | https://cdn.sofifa.net/flags/me.png | ST | Forward | -1 |
| 4592 | 263383.0 | https://sofifa.com/player/263383/stjepan-lonca... | S. Lončar | Stjepan Lončar | CDM, CAM | 71 | 79.0 | 3900000.0 | 550.0 | 24.0 | 1996-11-10 | 187.0 | 74.0 | 1874.0 | Ferencvárosi TC | Hungarian Nemzeti Bajnokság I | 1.0 | SUB | 44.0 | NaN | 2021-07-16 | 2026.0 | 8.0 | Bosnia and Herzegovina | NaN | NaN | NaN | Left | 3.0 | 2.0 | 1.0 | High/High | Normal (185+) | No | 9400000.0 | NaN | Solid Player, Playmaker (AI) | 61.0 | 62.0 | 67.0 | 71.0 | 65.0 | 73.0 | 57 | 61 | 54 | 70 | 61 | 71 | 60 | ... | 72 | 66 | 68 | 71 | 48 | 74 | 64 | 70 | 61 | 13 | 10 | 10 | 6 | 9 | NaN | 66+2 | 66+2 | 66+2 | 67 | 68 | 68 | 68 | 67 | 70+2 | 70+2 | 70+2 | 69+2 | 72+2 | 72+2 | 72+2 | 69+2 | 67+2 | 70+2 | 70+2 | 70+2 | 67+2 | 66+2 | 66+2 | 66+2 | 66+2 | 66+2 | 16+2 | https://cdn.sofifa.net/players/263/383/22_120.png | https://cdn.sofifa.net/teams/1874/60.png | https://cdn.sofifa.net/flags/hu.png | NaN | https://cdn.sofifa.net/flags/ba.png | CDM | Midfielder | -1 |
| 4593 | NaN | NaN | MachineGun | NaN | NaN | 99 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 99 | 99 | 99 | 99 | 99 | 99 | 99 | ... | 99 | 99 | 99 | 99 | 99 | 99 | 99 | 99 | 99 | 99 | 99 | 99 | 99 | 99 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | nan | -1 |
4594 rows × 113 columns
# Cantidad de clusters creados por el metodo DBScan
df_dbscan['dbscan'].unique()
array([-1, 0, 1, 2, 5, 6, 3, 4, 7, 8])
# Cantidad de registros asociados a cada clusters
df_dbscan['dbscan'].value_counts()
-1 2436 0 2022 1 31 4 30 2 28 6 15 3 12 5 10 7 5 8 5 Name: dbscan, dtype: int64
# Corremos silhouette
silhouette_score = metrics.silhouette_score(df_dbscan[['movement_acceleration', 'skill_ball_control']], clusters, metric='euclidean')
silhouette_score
0.09774482565253752
Comentario del resultado del la metrica
# Para que no me genere una distoncion de los grafico y tablas de analisis eliminamos de KMeans el MacineGun
df_dbscan.drop(df_dbscan.loc[df_dbscan['short_name']=='MachineGun'].index, inplace=True)
# Armo una crosstable con los registros asociados a cada cluster creado
cross = pd.crosstab(df_dbscan.dbscan, df_dbscan.Position2, normalize="index")
cross
| Position2 | Defender | Forward | GK | Midfielder |
|---|---|---|---|---|
| dbscan | ||||
| -1 | 0.355647 | 0.220945 | 0.182341 | 0.241068 |
| 0 | 0.281405 | 0.379327 | 0.000000 | 0.339268 |
| 1 | 0.580645 | 0.129032 | 0.000000 | 0.290323 |
| 2 | 0.321429 | 0.357143 | 0.000000 | 0.321429 |
| 3 | 0.666667 | 0.083333 | 0.000000 | 0.250000 |
| 4 | 0.066667 | 0.200000 | 0.000000 | 0.733333 |
| 5 | 0.700000 | 0.300000 | 0.000000 | 0.000000 |
| 6 | 0.133333 | 0.133333 | 0.000000 | 0.733333 |
| 7 | 0.000000 | 0.200000 | 0.000000 | 0.800000 |
| 8 | 0.800000 | 0.000000 | 0.000000 | 0.200000 |
# Armo un grafico para saber porcentualmente como se distribuyen los registros de las cuatro posiciones en los distintos cluster
plt.figure(figsize = (15,8))
sns.heatmap(cross, annot=True, cmap = "OrRd")
<AxesSubplot:xlabel='Position2', ylabel='dbscan'>
# Generamos el grafico pasando por parametro los datos c que genero el DBScan
plt.figure(figsize=(12,9))
plt.scatter(df_dbscan['movement_acceleration'], df_dbscan['skill_ball_control'], c=df_dbscan['dbscan'])
<matplotlib.collections.PathCollection at 0x7fa918d657c0>
# Defino una lista numerica para agregar una columna nueva con valores numericos que identifican la calsificacion anterior
posicion_numerica = {'Defender' : 0, 'Forward' : 1, 'GK' : 2, 'Midfielder' : 3}
# Copio la columna Position2 en Position3 y la trasnformo en numerico
df_dbscan['Position3'] = df_dbscan['Position2']
df_dbscan.replace({"Position3": posicion_numerica}, inplace=True)
df_dbscan['Position3']
0 1
1 1
2 1
3 1
4 3
..
4588 3
4589 3
4590 0
4591 1
4592 3
Name: Position3, Length: 4593, dtype: int64
# Grafico por las cuatro posiciones definidas 'Defender' : 0, 'Forward' : 1, 'GK' : 2, 'Midfielder' : 3
plt.figure(figsize=(12,9))
plt.scatter(df_dbscan['movement_acceleration'], df_dbscan['skill_ball_control'], c=df_dbscan['Position3'])
<matplotlib.collections.PathCollection at 0x7fa948331c10>
# Grafico coloreando diferente a las posiciones 'Defender' : 0, 'Forward' : 1, 'GK' : 2, 'Midfielder' : 3
plt.figure(figsize=(12,9))
plt.scatter(df_dbscan['movement_acceleration'], df_dbscan['skill_ball_control'],
#c = df_dbscan['Position3']!=0, # Marcamos en otro color los Defensores
#c = df_dbscan['Position3']!=1, # Marcamos en otro color los Delanteros
c = df_dbscan['Position3']!=2, # Marcamos en otro color los Arqueros
#c = df_dbscan['Position3']!=3, # Marcamos en otro color los Mediocampistas
cmap = 'Set1' #'inferno' 'viridis'
)
<matplotlib.collections.PathCollection at 0x7fa92bea42e0>
Antes de evaluara y analizar el resultados de los clusters es importante aclarar que se redujo el data frame original a 4600 elementos y se consideraron 34 variables o instancias de analisis. En la determinacion de los Clusters se utilizaron dos metodos de analisis
KMEANS para lo cual se realizo el Analisis de Codo y se determino que la cantidad conveniente de clusters para el analisis era 4. Con esta cantidad se entreno en modelo con el dataset con los mejores jugadores considerando las 34 variables de skill. Para su analisis visual su utilizaron dos variables/instancias aleatorias. En este caso analizando el mapa de calor que surge delos clusters generados se puede verificar que:
DBSCAN realiza el analizis de clusterizacion con este metodo tomamos dos variables/instancias "movement_acceleration" y "skill_ball_control". Para el entrenamiento de este modelos tenemos fijar el valor de dos parametros:
El metodo genera 10 cluster en su proceso, con el grafico de calor podemos visualizar que los Defensores en porcentaje superior al 50% se encuentar en los cluster 1, 3, 5 y 8, para los Arqueros se encuentran totalmente contenidos en el cluster -1, los Medicampistas considerando en porcentaje mayor al 50% se encuentras en los clusters 4, 6 y 7. Para el caso de los Delanteros estan distribuidos proporcionalmente en todos los clusters pero se presentan casi 40%, en los clusters 0 y 2. Por lo tanto se puede estimar que los clusters estan representando los 4 diferentes agrupamientos de posiciones
No fue necesaria realizar ninguna normalizacion porque todas, las 34 variables/intancias que se tomaron en cuenta para el analisis y que forman parte del skill de jugador varian entre 0 y 100
Como empezamos a realizar un analisis detallado de la base tomamos el total de los registrospara realizar la transformacion
# Realizo una copia del data frame para empezar el analisis
train = df.copy()
# Tomo el data frame original completo y me quedo con las columnas solo numericas
train = train.select_dtypes(['number'])
train.isnull().sum()
sofifa_id 0 overall 0 potential 0 value_eur 74 wage_eur 61 age 0 height_cm 0 weight_kg 0 club_team_id 61 league_level 61 club_jersey_number 61 club_contract_valid_until 61 nationality_id 0 nation_team_id 18480 nation_jersey_number 18480 weak_foot 0 skill_moves 0 international_reputation 0 release_clause_eur 1176 pace 2132 shooting 2132 passing 2132 dribbling 2132 defending 2132 physic 2132 attacking_crossing 0 attacking_finishing 0 attacking_heading_accuracy 0 attacking_short_passing 0 attacking_volleys 0 skill_dribbling 0 skill_curve 0 skill_fk_accuracy 0 skill_long_passing 0 skill_ball_control 0 movement_acceleration 0 movement_sprint_speed 0 movement_agility 0 movement_reactions 0 movement_balance 0 power_shot_power 0 power_jumping 0 power_stamina 0 power_strength 0 power_long_shots 0 mentality_aggression 0 mentality_interceptions 0 mentality_positioning 0 mentality_vision 0 mentality_penalties 0 mentality_composure 0 defending_marking_awareness 0 defending_standing_tackle 0 defending_sliding_tackle 0 goalkeeping_diving 0 goalkeeping_handling 0 goalkeeping_kicking 0 goalkeeping_positioning 0 goalkeeping_reflexes 0 goalkeeping_speed 17107 dtype: int64
# El metodo PCA no trabaja con valores nulos porque da error por lo tanto antes de aplicarlo tenemos que eliminarlos
# Elimino aquellas columnas que tienen muchos valores nulos en este caso por arriba de los 4000 registros y que no impactan al analisis que vamos a realizar
train = train.drop(['goalkeeping_speed', 'nation_team_id', 'nation_jersey_number'], axis=1)
# Elimino todos los registros que tienen algun valor nulo
train = train.dropna()
train.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 16020 entries, 0 to 19238 Data columns (total 57 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 sofifa_id 16020 non-null int64 1 overall 16020 non-null int64 2 potential 16020 non-null int64 3 value_eur 16020 non-null float64 4 wage_eur 16020 non-null float64 5 age 16020 non-null int64 6 height_cm 16020 non-null int64 7 weight_kg 16020 non-null int64 8 club_team_id 16020 non-null float64 9 league_level 16020 non-null float64 10 club_jersey_number 16020 non-null float64 11 club_contract_valid_until 16020 non-null float64 12 nationality_id 16020 non-null int64 13 weak_foot 16020 non-null int64 14 skill_moves 16020 non-null int64 15 international_reputation 16020 non-null int64 16 release_clause_eur 16020 non-null float64 17 pace 16020 non-null float64 18 shooting 16020 non-null float64 19 passing 16020 non-null float64 20 dribbling 16020 non-null float64 21 defending 16020 non-null float64 22 physic 16020 non-null float64 23 attacking_crossing 16020 non-null int64 24 attacking_finishing 16020 non-null int64 25 attacking_heading_accuracy 16020 non-null int64 26 attacking_short_passing 16020 non-null int64 27 attacking_volleys 16020 non-null int64 28 skill_dribbling 16020 non-null int64 29 skill_curve 16020 non-null int64 30 skill_fk_accuracy 16020 non-null int64 31 skill_long_passing 16020 non-null int64 32 skill_ball_control 16020 non-null int64 33 movement_acceleration 16020 non-null int64 34 movement_sprint_speed 16020 non-null int64 35 movement_agility 16020 non-null int64 36 movement_reactions 16020 non-null int64 37 movement_balance 16020 non-null int64 38 power_shot_power 16020 non-null int64 39 power_jumping 16020 non-null int64 40 power_stamina 16020 non-null int64 41 power_strength 16020 non-null int64 42 power_long_shots 16020 non-null int64 43 mentality_aggression 16020 non-null int64 44 mentality_interceptions 16020 non-null int64 45 mentality_positioning 16020 non-null int64 46 mentality_vision 16020 non-null int64 47 mentality_penalties 16020 non-null int64 48 mentality_composure 16020 non-null int64 49 defending_marking_awareness 16020 non-null int64 50 defending_standing_tackle 16020 non-null int64 51 defending_sliding_tackle 16020 non-null int64 52 goalkeeping_diving 16020 non-null int64 53 goalkeeping_handling 16020 non-null int64 54 goalkeeping_kicking 16020 non-null int64 55 goalkeeping_positioning 16020 non-null int64 56 goalkeeping_reflexes 16020 non-null int64 dtypes: float64(13), int64(44) memory usage: 7.1 MB
# Como este metodo genera un arreglo de valores, me salvo las columnas para
# cargarlas de nuevo en el data frame resultado
x_names = train.columns
array_standard = StandardScaler().fit_transform(train)
# Paso el resultado de la estandarizacion al data frame que voy a usar
df_train = pd.DataFrame(array_standard, columns=x_names)
df_train
| sofifa_id | overall | potential | value_eur | wage_eur | age | height_cm | weight_kg | club_team_id | league_level | club_jersey_number | club_contract_valid_until | nationality_id | weak_foot | skill_moves | international_reputation | release_clause_eur | pace | shooting | passing | dribbling | defending | physic | attacking_crossing | attacking_finishing | attacking_heading_accuracy | attacking_short_passing | attacking_volleys | skill_dribbling | skill_curve | skill_fk_accuracy | skill_long_passing | skill_ball_control | movement_acceleration | movement_sprint_speed | movement_agility | movement_reactions | movement_balance | power_shot_power | power_jumping | power_stamina | power_strength | power_long_shots | mentality_aggression | mentality_interceptions | mentality_positioning | mentality_vision | mentality_penalties | mentality_composure | defending_marking_awareness | defending_standing_tackle | defending_sliding_tackle | goalkeeping_diving | goalkeeping_handling | goalkeeping_kicking | goalkeeping_positioning | goalkeeping_reflexes | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | -2.814182 | 3.972812 | 3.633107 | 9.563413 | 15.552343 | 1.895405 | -1.580231 | -0.311538 | -0.931817 | -0.46692 | 0.534708 | 0.134481 | -0.143249 | 1.537554 | 2.335002 | 10.366199 | 9.172025 | 1.545325 | 2.831952 | 3.335572 | 3.364156 | -1.110820 | 0.014113 | 2.271702 | 2.767429 | 1.172677 | 3.018222 | 2.873776 | 2.953936 | 2.819051 | 3.269875 | 2.887052 | 3.416208 | 2.007595 | 1.059489 | 2.011523 | 3.672473 | 2.324560 | 2.038870 | 0.179684 | 0.409735 | 0.262048 | 2.737124 | -1.107667 | -0.586248 | 2.646885 | 3.101898 | 1.895392 | 3.489689 | -1.784687 | -0.957899 | -1.434952 | -1.444394 | 0.196080 | 1.522753 | 1.204169 | -0.774593 |
| 1 | -1.650197 | 3.826351 | 3.467622 | 14.852362 | 13.050904 | 1.464970 | 0.710318 | 1.033460 | -0.932772 | -0.46692 | -0.674914 | 0.134481 | -0.439952 | 1.537554 | 2.335002 | 10.366199 | 12.670077 | 0.906003 | 2.831952 | 2.148267 | 2.434230 | -0.489676 | 1.747773 | 1.244270 | 2.767429 | 2.900963 | 2.376476 | 2.942524 | 2.030874 | 1.872890 | 2.659722 | 1.131574 | 2.581519 | 0.782792 | 0.970611 | 0.862980 | 3.558399 | 1.250937 | 2.341423 | 1.623331 | 0.763130 | 1.600470 | 2.292601 | 1.565552 | -0.094965 | 2.786988 | 1.994476 | 3.099884 | 2.715090 | -0.918007 | -0.575722 | -1.709306 | 1.536991 | -1.470516 | 0.535068 | -0.791155 | -0.115960 |
| 2 | -8.047271 | 3.679891 | 3.302138 | 5.357744 | 13.050904 | 2.325839 | 1.015724 | 1.332349 | -0.932956 | -0.46692 | -0.790116 | 0.134481 | -0.420172 | 1.537554 | 3.904603 | 10.366199 | 5.138355 | 1.727988 | 2.974198 | 2.247209 | 2.640880 | -1.110820 | 1.033913 | 2.418478 | 2.767429 | 2.900963 | 1.841688 | 2.736280 | 2.282618 | 2.008056 | 2.591927 | 1.716733 | 2.581519 | 1.482679 | 1.770512 | 1.601329 | 3.672473 | 0.590245 | 2.643977 | 2.472534 | 0.851479 | 0.891894 | 2.673621 | 0.265067 | -1.186704 | 2.786988 | 1.598968 | 2.939285 | 3.392864 | -1.553573 | -1.121689 | -1.434952 | -1.113129 | 0.196080 | 1.522753 | 1.204169 | 0.213357 |
| 3 | -1.561492 | 3.679891 | 3.302138 | 16.063085 | 13.050904 | 0.819317 | -0.816714 | -0.909315 | -0.931817 | -0.46692 | -0.617313 | 1.758588 | -0.103688 | 3.073574 | 3.904603 | 10.366199 | 15.414295 | 2.093315 | 2.191846 | 2.840861 | 3.260831 | -0.924477 | -0.189847 | 2.271702 | 2.028437 | 0.567777 | 2.483434 | 2.736280 | 2.870021 | 2.481137 | 2.795311 | 2.051110 | 3.311872 | 2.182567 | 1.859390 | 2.421717 | 3.102103 | 1.416109 | 1.585040 | -0.159997 | 1.204874 | -0.997644 | 1.911580 | 0.265067 | -0.750008 | 2.156525 | 2.706390 | 3.340782 | 3.199214 | -0.918007 | -1.121689 | -1.160598 | -0.450599 | -0.470559 | 1.522753 | 1.536723 | 0.213357 |
| 4 | -1.480873 | 3.679891 | 3.302138 | 15.617029 | 17.053206 | 1.034535 | 0.099505 | -0.610427 | -0.932974 | -0.46692 | -0.214106 | 1.758588 | -1.033358 | 3.073574 | 2.335002 | 7.710155 | 14.984478 | 0.723339 | 2.405215 | 3.533456 | 2.640880 | 0.752612 | 1.339853 | 2.932194 | 1.966855 | -0.123537 | 3.339095 | 2.461287 | 2.282618 | 2.278388 | 2.524132 | 3.054240 | 2.894527 | 0.695306 | 0.703977 | 1.027057 | 3.330251 | 0.920591 | 2.417062 | -0.244917 | 1.911664 | 0.655701 | 2.546614 | 1.204306 | 0.833013 | 2.296628 | 3.022796 | 2.537788 | 2.811915 | 0.988690 | 0.680001 | 0.156299 | 1.536991 | 0.862718 | -1.769531 | -0.126047 | 0.871990 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 16015 | 1.149629 | -2.764391 | -3.151756 | -0.368340 | -0.406835 | -0.687205 | -0.053198 | -1.507092 | 1.133927 | -0.46692 | 0.880315 | -1.489627 | 1.894114 | 0.001534 | -0.804200 | -0.257978 | -0.362381 | -0.920632 | -1.222053 | -1.116823 | -1.492126 | -0.613905 | -1.617567 | -0.590430 | -1.112278 | -0.728437 | -1.367042 | -1.113617 | -1.325717 | -1.235924 | -0.526633 | -0.707498 | -1.487591 | -1.054413 | -0.718069 | -0.941874 | -1.004564 | 0.259900 | -0.986665 | -0.329838 | -1.445588 | -1.548760 | -1.327095 | -0.529674 | -0.477074 | -1.205942 | -1.011384 | -0.754489 | -2.222976 | -0.744671 | -0.521126 | -0.118054 | -1.444394 | -0.137239 | -1.769531 | 1.536723 | 0.871990 |
| 16016 | 1.152604 | -2.764391 | -1.993364 | -0.363242 | -0.431849 | -1.332857 | -0.816714 | -0.610427 | -0.924985 | -0.46692 | 0.361905 | -1.489627 | -0.677314 | 0.001534 | -0.804200 | -0.257978 | -0.357157 | -0.829300 | -0.937561 | -0.721054 | -1.698777 | -0.676020 | -1.413607 | -0.003326 | -1.050695 | -0.901265 | -1.260084 | -0.976121 | -1.661376 | 0.115735 | -1.001196 | -0.540310 | -2.217944 | -0.704469 | -0.895825 | -0.203525 | -1.460860 | 0.177313 | -0.759750 | -1.603643 | -0.385403 | -1.470029 | -0.819067 | -0.746422 | -0.640835 | -0.295274 | -0.536775 | -0.593890 | -1.254728 | -0.802449 | -0.466529 | -0.172925 | 0.211931 | 0.529399 | -1.440302 | -0.791155 | -0.115960 |
| 16017 | 1.180062 | -2.764391 | -2.655302 | -0.364516 | -0.431849 | -0.902422 | -0.358605 | -0.311538 | 1.108029 | -0.46692 | 0.592309 | -1.489627 | -0.677314 | 0.001534 | -0.804200 | -0.257978 | -0.358347 | -0.737968 | -1.079807 | -1.215765 | -1.388801 | -0.676020 | -1.311627 | -1.104146 | -1.112278 | -1.160508 | -1.473999 | -0.632380 | -1.157888 | -0.965592 | -0.594427 | -0.623904 | -1.487591 | -0.704469 | -0.718069 | -0.695758 | -1.803082 | -0.648551 | -0.684112 | -0.754439 | -1.003845 | -1.233837 | -1.073081 | -0.601923 | -0.695422 | -0.715583 | -0.774080 | -0.995387 | -2.319801 | -0.744671 | -0.466529 | -0.118054 | -0.781864 | -1.470516 | -1.111074 | -0.126047 | -1.433226 |
| 16018 | 1.182350 | -2.764391 | -1.827880 | -0.363242 | -0.431849 | -1.332857 | -1.122121 | -1.208204 | 1.108029 | -0.46692 | -0.502111 | -1.489627 | -0.677314 | 0.001534 | -0.804200 | -0.257978 | -0.354115 | -0.007315 | -0.439701 | -2.106244 | -1.492126 | -2.290994 | -2.331427 | -1.838026 | -0.065373 | -1.419751 | -2.650534 | -0.838625 | -1.577461 | -1.033175 | -0.797812 | -1.961411 | -1.904936 | 0.082904 | -0.095924 | 0.452786 | -1.574934 | 0.507659 | -0.835389 | -1.348882 | -1.533937 | -2.021144 | -0.628557 | -1.830159 | -2.005509 | -0.575480 | -1.248689 | -0.192393 | -1.254728 | -2.362474 | -2.104429 | -2.148272 | -1.113129 | -0.137239 | -1.111074 | 1.204169 | 1.530623 |
| 16019 | 1.247944 | -2.764391 | -1.827880 | -0.363242 | -0.431849 | -1.332857 | -2.038340 | -1.955425 | 1.143093 | -0.46692 | -0.444510 | 1.758588 | 1.973235 | 0.001534 | -0.804200 | -0.257978 | -0.355570 | -0.007315 | -1.008684 | -1.215765 | -1.492126 | -0.986592 | -1.719547 | -1.104146 | -1.112278 | -0.901265 | -1.367042 | -0.632380 | -1.073973 | -0.898009 | -0.933401 | -0.623904 | -2.635289 | 0.170390 | -0.095924 | -0.121486 | -0.890490 | 1.498696 | -0.684112 | -0.329838 | -0.915496 | -2.021144 | -0.946074 | -0.312926 | -0.477074 | -0.575480 | -0.774080 | -1.316585 | -2.319801 | -1.033564 | -1.176286 | -0.721633 | -0.781864 | 0.862718 | 0.864296 | -0.458601 | 1.201306 |
16020 rows × 57 columns
# Creo ls variable PCA, entreno y transformo el modelo genero el resultado
pca = PCA(n_components=3)
scores_pca = pca.fit_transform(df_train)
scores_pca
array([[19.98252021, 2.60510249, 0.10553207],
[18.18746104, 6.22645138, 2.63784608],
[17.75648044, 4.24657088, 4.38742106],
...,
[-6.22604303, -2.95838402, -0.12500675],
[-5.84147896, -7.24796756, 0.88689229],
[-5.33240697, -4.52676279, -2.17010607]])
# Grafico la distribucion del calculo generada por el PCA
plt.figure(figsize=(12,9))
sns.scatterplot(x = scores_pca[:,0], y = scores_pca[:,1], palette = sns.hls_palette(10), legend = 'full');
# Preparo mi data frame para la visualizacion entonces realizo la la union del data frame sin estandarizar
# train con los valores que se obtubieron de PCA
df_segm_pca= pd.concat([train.reset_index(drop=True), pd.DataFrame(scores_pca)], axis=1)
# A esas nuevas columnas agregadas le asigno un nombre
df_segm_pca.columns.values[-3: ]= ['Component 1','Component 2','Component 3']
df_segm_pca
| sofifa_id | overall | potential | value_eur | wage_eur | age | height_cm | weight_kg | club_team_id | league_level | club_jersey_number | club_contract_valid_until | nationality_id | weak_foot | skill_moves | international_reputation | release_clause_eur | pace | shooting | passing | dribbling | defending | physic | attacking_crossing | attacking_finishing | attacking_heading_accuracy | attacking_short_passing | attacking_volleys | skill_dribbling | skill_curve | skill_fk_accuracy | skill_long_passing | skill_ball_control | movement_acceleration | movement_sprint_speed | movement_agility | movement_reactions | movement_balance | power_shot_power | power_jumping | power_stamina | power_strength | power_long_shots | mentality_aggression | mentality_interceptions | mentality_positioning | mentality_vision | mentality_penalties | mentality_composure | defending_marking_awareness | defending_standing_tackle | defending_sliding_tackle | goalkeeping_diving | goalkeeping_handling | goalkeeping_kicking | goalkeeping_positioning | goalkeeping_reflexes | Component 1 | Component 2 | Component 3 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 158023 | 93 | 93 | 78000000.0 | 320000.0 | 34 | 170 | 72 | 73.0 | 1.0 | 30.0 | 2023.0 | 52 | 4 | 4 | 5 | 144300000.0 | 85.0 | 92.0 | 91.0 | 95.0 | 34.0 | 65.0 | 85 | 95 | 70 | 91 | 88 | 96 | 93 | 94 | 91 | 96 | 91 | 80 | 91 | 94 | 95 | 86 | 68 | 72 | 69 | 94 | 44 | 40 | 93 | 95 | 75 | 96 | 20 | 35 | 24 | 6 | 11 | 15 | 14 | 8 | 19.982520 | 2.605102 | 0.105532 |
| 1 | 188545 | 92 | 92 | 119500000.0 | 270000.0 | 32 | 185 | 81 | 21.0 | 1.0 | 9.0 | 2023.0 | 37 | 4 | 4 | 5 | 197200000.0 | 78.0 | 92.0 | 79.0 | 86.0 | 44.0 | 82.0 | 71 | 95 | 90 | 85 | 89 | 85 | 79 | 85 | 70 | 88 | 77 | 79 | 77 | 93 | 82 | 90 | 85 | 76 | 86 | 87 | 81 | 49 | 95 | 81 | 90 | 88 | 35 | 42 | 19 | 15 | 6 | 12 | 8 | 10 | 18.187461 | 6.226451 | 2.637846 |
| 2 | 20801 | 91 | 91 | 45000000.0 | 270000.0 | 36 | 187 | 83 | 11.0 | 1.0 | 7.0 | 2023.0 | 38 | 4 | 5 | 5 | 83300000.0 | 87.0 | 94.0 | 80.0 | 88.0 | 34.0 | 75.0 | 87 | 95 | 90 | 80 | 86 | 88 | 81 | 84 | 77 | 88 | 85 | 88 | 86 | 94 | 74 | 94 | 95 | 77 | 77 | 93 | 63 | 29 | 95 | 76 | 88 | 95 | 24 | 32 | 24 | 7 | 11 | 15 | 14 | 11 | 17.756480 | 4.246571 | 4.387421 |
| 3 | 190871 | 91 | 91 | 129000000.0 | 270000.0 | 29 | 175 | 68 | 73.0 | 1.0 | 10.0 | 2025.0 | 54 | 5 | 5 | 5 | 238700000.0 | 91.0 | 83.0 | 86.0 | 94.0 | 37.0 | 63.0 | 85 | 83 | 63 | 86 | 86 | 95 | 88 | 87 | 81 | 95 | 93 | 89 | 96 | 89 | 84 | 80 | 64 | 81 | 53 | 81 | 63 | 37 | 86 | 90 | 93 | 93 | 35 | 32 | 29 | 9 | 9 | 15 | 15 | 11 | 20.421666 | 2.970799 | -1.634142 |
| 4 | 192985 | 91 | 91 | 125500000.0 | 350000.0 | 30 | 181 | 70 | 10.0 | 1.0 | 17.0 | 2025.0 | 7 | 5 | 4 | 4 | 232200000.0 | 76.0 | 86.0 | 93.0 | 88.0 | 64.0 | 78.0 | 94 | 82 | 55 | 94 | 82 | 88 | 85 | 83 | 93 | 91 | 76 | 76 | 79 | 91 | 78 | 91 | 63 | 89 | 74 | 91 | 76 | 66 | 88 | 94 | 83 | 89 | 68 | 65 | 53 | 15 | 13 | 5 | 10 | 13 | 19.879639 | 7.711465 | -1.883137 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 16015 | 261962 | 47 | 52 | 70000.0 | 1000.0 | 22 | 180 | 64 | 112541.0 | 1.0 | 36.0 | 2021.0 | 155 | 3 | 2 | 1 | 114000.0 | 58.0 | 35.0 | 46.0 | 48.0 | 42.0 | 49.0 | 46 | 32 | 48 | 50 | 30 | 45 | 33 | 38 | 48 | 49 | 56 | 60 | 55 | 53 | 70 | 46 | 62 | 51 | 46 | 30 | 52 | 42 | 38 | 43 | 42 | 37 | 38 | 43 | 48 | 6 | 10 | 5 | 15 | 13 | -6.479586 | -2.902181 | -0.563466 |
| 16016 | 262040 | 47 | 59 | 110000.0 | 500.0 | 19 | 175 | 70 | 445.0 | 1.0 | 27.0 | 2021.0 | 25 | 3 | 2 | 1 | 193000.0 | 59.0 | 39.0 | 50.0 | 46.0 | 41.0 | 51.0 | 54 | 33 | 46 | 51 | 32 | 41 | 53 | 31 | 50 | 42 | 60 | 58 | 64 | 49 | 69 | 49 | 47 | 63 | 47 | 38 | 49 | 39 | 51 | 49 | 44 | 47 | 37 | 44 | 47 | 11 | 12 | 6 | 8 | 10 | -5.156328 | -3.008489 | -0.760424 |
| 16017 | 262760 | 47 | 55 | 100000.0 | 500.0 | 21 | 178 | 72 | 111131.0 | 1.0 | 31.0 | 2021.0 | 25 | 3 | 2 | 1 | 175000.0 | 60.0 | 37.0 | 45.0 | 49.0 | 41.0 | 52.0 | 39 | 32 | 43 | 49 | 37 | 47 | 37 | 37 | 49 | 49 | 60 | 60 | 58 | 46 | 59 | 50 | 57 | 56 | 50 | 34 | 51 | 38 | 45 | 46 | 39 | 36 | 38 | 44 | 48 | 8 | 6 | 7 | 10 | 6 | -6.226043 | -2.958384 | -0.125007 |
| 16018 | 262820 | 47 | 60 | 110000.0 | 500.0 | 19 | 173 | 66 | 111131.0 | 1.0 | 12.0 | 2021.0 | 25 | 3 | 2 | 1 | 239000.0 | 68.0 | 46.0 | 36.0 | 48.0 | 15.0 | 42.0 | 29 | 49 | 40 | 38 | 34 | 42 | 36 | 34 | 33 | 45 | 69 | 67 | 72 | 48 | 73 | 48 | 50 | 50 | 40 | 41 | 34 | 14 | 47 | 40 | 49 | 47 | 10 | 14 | 11 | 7 | 10 | 7 | 14 | 15 | -5.841479 | -7.247968 | 0.886892 |
| 16019 | 264540 | 47 | 60 | 110000.0 | 500.0 | 19 | 167 | 61 | 113040.0 | 1.0 | 13.0 | 2025.0 | 159 | 3 | 2 | 1 | 217000.0 | 68.0 | 38.0 | 45.0 | 48.0 | 36.0 | 48.0 | 39 | 32 | 46 | 50 | 37 | 48 | 38 | 32 | 49 | 38 | 70 | 67 | 65 | 54 | 85 | 50 | 62 | 57 | 40 | 36 | 55 | 42 | 47 | 46 | 35 | 36 | 33 | 31 | 37 | 8 | 13 | 13 | 9 | 14 | -5.332407 | -4.526763 | -2.170106 |
16020 rows × 60 columns
UTILIZAR EL DATA FRAME DF_SEGM_PCA TIENA ASIGNADAS LAS COLUMNAS RESULTADO DEL PCA
# Analisis para determinar el hiperparámetro n_clusters, variando de 2 a 11 clusters
scores = [KMeans(n_clusters=i, init='k-means++', random_state=42).fit(scores_pca).inertia_ for i in range(2,12)]
plt.figure(figsize=(12,9))
plt.plot(np.arange(2, 12), scores, marker='o', linestyle='--')
plt.xlabel('Number of clusters')
plt.ylabel("Inertia")
plt.title("K-Means with PCA")
Text(0.5, 1.0, 'K-Means with PCA')
COMO KMEAN UTILIZA LA TOTALIDAD DE INSTANCIAS QUE FORMAN EL DATA SET DE ANALISIS, CONSIDERAMOS PARA LA DEFINICION DE CLUSTERS SOLAMENTE EL ARRAY RESULTADO DEL DATA FRAME AL CUAL SE APLICO PCA
# Clustering K-Means sobre el array score_pca que obtubimos resultado de
# aplicar PCA en data frame estandarizado sin nulos df_train
kmeans_pca = KMeans(n_clusters= 4, init='k-means++', random_state=42)
kmeans_pca.fit(scores_pca)
KMeans(n_clusters=4, random_state=42)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
KMeans(n_clusters=4, random_state=42)
# Del tata frame que armamos con los resultados del PCS vamos a realizar una copia para trabajar los clusters de KMean
df_segm_pca_kmeans = df_segm_pca.copy()
# Al data frame que aramamos con los resultados de PCA le agrego la columna de los clusters que sale del analisis de KMeans
df_segm_pca_kmeans['Segment k-means PCA']= kmeans_pca.labels_
df_segm_pca_kmeans
| sofifa_id | overall | potential | value_eur | wage_eur | age | height_cm | weight_kg | club_team_id | league_level | club_jersey_number | club_contract_valid_until | nationality_id | weak_foot | skill_moves | international_reputation | release_clause_eur | pace | shooting | passing | dribbling | defending | physic | attacking_crossing | attacking_finishing | attacking_heading_accuracy | attacking_short_passing | attacking_volleys | skill_dribbling | skill_curve | skill_fk_accuracy | skill_long_passing | skill_ball_control | movement_acceleration | movement_sprint_speed | movement_agility | movement_reactions | movement_balance | power_shot_power | power_jumping | power_stamina | power_strength | power_long_shots | mentality_aggression | mentality_interceptions | mentality_positioning | mentality_vision | mentality_penalties | mentality_composure | defending_marking_awareness | defending_standing_tackle | defending_sliding_tackle | goalkeeping_diving | goalkeeping_handling | goalkeeping_kicking | goalkeeping_positioning | goalkeeping_reflexes | Component 1 | Component 2 | Component 3 | Segment k-means PCA | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 158023 | 93 | 93 | 78000000.0 | 320000.0 | 34 | 170 | 72 | 73.0 | 1.0 | 30.0 | 2023.0 | 52 | 4 | 4 | 5 | 144300000.0 | 85.0 | 92.0 | 91.0 | 95.0 | 34.0 | 65.0 | 85 | 95 | 70 | 91 | 88 | 96 | 93 | 94 | 91 | 96 | 91 | 80 | 91 | 94 | 95 | 86 | 68 | 72 | 69 | 94 | 44 | 40 | 93 | 95 | 75 | 96 | 20 | 35 | 24 | 6 | 11 | 15 | 14 | 8 | 19.982520 | 2.605102 | 0.105532 | 3 |
| 1 | 188545 | 92 | 92 | 119500000.0 | 270000.0 | 32 | 185 | 81 | 21.0 | 1.0 | 9.0 | 2023.0 | 37 | 4 | 4 | 5 | 197200000.0 | 78.0 | 92.0 | 79.0 | 86.0 | 44.0 | 82.0 | 71 | 95 | 90 | 85 | 89 | 85 | 79 | 85 | 70 | 88 | 77 | 79 | 77 | 93 | 82 | 90 | 85 | 76 | 86 | 87 | 81 | 49 | 95 | 81 | 90 | 88 | 35 | 42 | 19 | 15 | 6 | 12 | 8 | 10 | 18.187461 | 6.226451 | 2.637846 | 3 |
| 2 | 20801 | 91 | 91 | 45000000.0 | 270000.0 | 36 | 187 | 83 | 11.0 | 1.0 | 7.0 | 2023.0 | 38 | 4 | 5 | 5 | 83300000.0 | 87.0 | 94.0 | 80.0 | 88.0 | 34.0 | 75.0 | 87 | 95 | 90 | 80 | 86 | 88 | 81 | 84 | 77 | 88 | 85 | 88 | 86 | 94 | 74 | 94 | 95 | 77 | 77 | 93 | 63 | 29 | 95 | 76 | 88 | 95 | 24 | 32 | 24 | 7 | 11 | 15 | 14 | 11 | 17.756480 | 4.246571 | 4.387421 | 3 |
| 3 | 190871 | 91 | 91 | 129000000.0 | 270000.0 | 29 | 175 | 68 | 73.0 | 1.0 | 10.0 | 2025.0 | 54 | 5 | 5 | 5 | 238700000.0 | 91.0 | 83.0 | 86.0 | 94.0 | 37.0 | 63.0 | 85 | 83 | 63 | 86 | 86 | 95 | 88 | 87 | 81 | 95 | 93 | 89 | 96 | 89 | 84 | 80 | 64 | 81 | 53 | 81 | 63 | 37 | 86 | 90 | 93 | 93 | 35 | 32 | 29 | 9 | 9 | 15 | 15 | 11 | 20.421666 | 2.970799 | -1.634142 | 3 |
| 4 | 192985 | 91 | 91 | 125500000.0 | 350000.0 | 30 | 181 | 70 | 10.0 | 1.0 | 17.0 | 2025.0 | 7 | 5 | 4 | 4 | 232200000.0 | 76.0 | 86.0 | 93.0 | 88.0 | 64.0 | 78.0 | 94 | 82 | 55 | 94 | 82 | 88 | 85 | 83 | 93 | 91 | 76 | 76 | 79 | 91 | 78 | 91 | 63 | 89 | 74 | 91 | 76 | 66 | 88 | 94 | 83 | 89 | 68 | 65 | 53 | 15 | 13 | 5 | 10 | 13 | 19.879639 | 7.711465 | -1.883137 | 3 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 16015 | 261962 | 47 | 52 | 70000.0 | 1000.0 | 22 | 180 | 64 | 112541.0 | 1.0 | 36.0 | 2021.0 | 155 | 3 | 2 | 1 | 114000.0 | 58.0 | 35.0 | 46.0 | 48.0 | 42.0 | 49.0 | 46 | 32 | 48 | 50 | 30 | 45 | 33 | 38 | 48 | 49 | 56 | 60 | 55 | 53 | 70 | 46 | 62 | 51 | 46 | 30 | 52 | 42 | 38 | 43 | 42 | 37 | 38 | 43 | 48 | 6 | 10 | 5 | 15 | 13 | -6.479586 | -2.902181 | -0.563466 | 1 |
| 16016 | 262040 | 47 | 59 | 110000.0 | 500.0 | 19 | 175 | 70 | 445.0 | 1.0 | 27.0 | 2021.0 | 25 | 3 | 2 | 1 | 193000.0 | 59.0 | 39.0 | 50.0 | 46.0 | 41.0 | 51.0 | 54 | 33 | 46 | 51 | 32 | 41 | 53 | 31 | 50 | 42 | 60 | 58 | 64 | 49 | 69 | 49 | 47 | 63 | 47 | 38 | 49 | 39 | 51 | 49 | 44 | 47 | 37 | 44 | 47 | 11 | 12 | 6 | 8 | 10 | -5.156328 | -3.008489 | -0.760424 | 1 |
| 16017 | 262760 | 47 | 55 | 100000.0 | 500.0 | 21 | 178 | 72 | 111131.0 | 1.0 | 31.0 | 2021.0 | 25 | 3 | 2 | 1 | 175000.0 | 60.0 | 37.0 | 45.0 | 49.0 | 41.0 | 52.0 | 39 | 32 | 43 | 49 | 37 | 47 | 37 | 37 | 49 | 49 | 60 | 60 | 58 | 46 | 59 | 50 | 57 | 56 | 50 | 34 | 51 | 38 | 45 | 46 | 39 | 36 | 38 | 44 | 48 | 8 | 6 | 7 | 10 | 6 | -6.226043 | -2.958384 | -0.125007 | 1 |
| 16018 | 262820 | 47 | 60 | 110000.0 | 500.0 | 19 | 173 | 66 | 111131.0 | 1.0 | 12.0 | 2021.0 | 25 | 3 | 2 | 1 | 239000.0 | 68.0 | 46.0 | 36.0 | 48.0 | 15.0 | 42.0 | 29 | 49 | 40 | 38 | 34 | 42 | 36 | 34 | 33 | 45 | 69 | 67 | 72 | 48 | 73 | 48 | 50 | 50 | 40 | 41 | 34 | 14 | 47 | 40 | 49 | 47 | 10 | 14 | 11 | 7 | 10 | 7 | 14 | 15 | -5.841479 | -7.247968 | 0.886892 | 0 |
| 16019 | 264540 | 47 | 60 | 110000.0 | 500.0 | 19 | 167 | 61 | 113040.0 | 1.0 | 13.0 | 2025.0 | 159 | 3 | 2 | 1 | 217000.0 | 68.0 | 38.0 | 45.0 | 48.0 | 36.0 | 48.0 | 39 | 32 | 46 | 50 | 37 | 48 | 38 | 32 | 49 | 38 | 70 | 67 | 65 | 54 | 85 | 50 | 62 | 57 | 40 | 36 | 55 | 42 | 47 | 46 | 35 | 36 | 33 | 31 | 37 | 8 | 13 | 13 | 9 | 14 | -5.332407 | -4.526763 | -2.170106 | 0 |
16020 rows × 61 columns
# Media de todas la svariables numericas considerando la segmentacion que genero
df_segm_pca_kmeans_freq= df_segm_pca_kmeans.groupby(['Segment k-means PCA']).mean()
df_segm_pca_kmeans_freq
| sofifa_id | overall | potential | value_eur | wage_eur | age | height_cm | weight_kg | club_team_id | league_level | club_jersey_number | club_contract_valid_until | nationality_id | weak_foot | skill_moves | international_reputation | release_clause_eur | pace | shooting | passing | dribbling | defending | physic | attacking_crossing | attacking_finishing | attacking_heading_accuracy | attacking_short_passing | attacking_volleys | skill_dribbling | skill_curve | skill_fk_accuracy | skill_long_passing | skill_ball_control | movement_acceleration | movement_sprint_speed | movement_agility | movement_reactions | movement_balance | power_shot_power | power_jumping | power_stamina | power_strength | power_long_shots | mentality_aggression | mentality_interceptions | mentality_positioning | mentality_vision | mentality_penalties | mentality_composure | defending_marking_awareness | defending_standing_tackle | defending_sliding_tackle | goalkeeping_diving | goalkeeping_handling | goalkeeping_kicking | goalkeeping_positioning | goalkeeping_reflexes | Component 1 | Component 2 | Component 3 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Segment k-means PCA | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| 0 | 242553.214106 | 62.171201 | 69.578275 | 9.333445e+05 | 2912.002930 | 23.276057 | 178.392424 | 71.900167 | 58251.919841 | 1.407702 | 24.800126 | 2022.770825 | 63.309125 | 3.043951 | 2.571787 | 1.001465 | 1.762719e+06 | 71.859983 | 57.871704 | 54.393470 | 63.216409 | 33.597530 | 56.992047 | 51.362704 | 58.772918 | 50.837798 | 58.884889 | 51.205316 | 62.921515 | 51.464420 | 44.856844 | 50.099414 | 62.558602 | 72.007325 | 71.704897 | 69.704270 | 56.947886 | 69.598368 | 60.926538 | 60.793219 | 61.086229 | 58.521139 | 54.494558 | 46.973629 | 30.181247 | 59.371704 | 55.790707 | 55.589787 | 56.203223 | 32.035789 | 32.487233 | 30.780243 | 10.222687 | 10.240477 | 10.241942 | 10.277941 | 10.225617 | -0.137678 | -3.386877 | 0.796934 |
| 1 | 241780.794582 | 61.388826 | 68.312077 | 7.714207e+05 | 2711.343115 | 24.002822 | 183.850734 | 76.658578 | 61692.563205 | 1.405474 | 21.169582 | 2022.638826 | 64.910553 | 2.705700 | 2.016084 | 1.007619 | 1.452181e+06 | 60.549379 | 33.242664 | 45.927201 | 49.732788 | 59.917043 | 66.040068 | 41.219526 | 29.236456 | 57.713318 | 54.568284 | 29.631490 | 45.211907 | 34.496050 | 31.819131 | 48.062359 | 51.968679 | 59.988713 | 60.983070 | 54.717551 | 56.487302 | 58.211907 | 43.443567 | 67.610327 | 63.061230 | 69.443284 | 30.605248 | 60.757619 | 58.974323 | 36.806998 | 40.166479 | 38.729402 | 51.297404 | 58.950339 | 62.051072 | 60.025959 | 10.269187 | 10.325903 | 10.169865 | 10.205700 | 10.193849 | -5.258480 | 1.500853 | -0.246113 |
| 2 | 225567.879673 | 67.592347 | 71.269409 | 2.245839e+06 | 7832.404335 | 26.396594 | 180.680159 | 74.690555 | 47485.240876 | 1.385092 | 17.992701 | 2022.802699 | 55.947799 | 2.943597 | 2.338642 | 1.058394 | 4.216224e+06 | 66.789648 | 49.736784 | 60.351692 | 63.453218 | 63.938067 | 69.666224 | 57.364300 | 45.571113 | 58.938509 | 66.084052 | 42.213891 | 61.604291 | 51.922805 | 46.193320 | 62.016589 | 64.775935 | 66.622650 | 66.896704 | 66.268525 | 64.099093 | 66.432869 | 59.303915 | 68.659810 | 72.618668 | 69.183367 | 50.549436 | 67.300376 | 64.258571 | 54.412077 | 57.573324 | 48.132935 | 62.288211 | 63.522008 | 65.785667 | 63.496350 | 10.496572 | 10.532847 | 10.470250 | 10.479319 | 10.482637 | 0.252599 | 1.916560 | -0.760903 |
| 3 | 213445.549575 | 74.003777 | 75.983318 | 9.466627e+06 | 27498.268807 | 27.691218 | 178.910922 | 73.636449 | 32184.873151 | 1.126849 | 17.948379 | 2023.193264 | 51.489770 | 3.337425 | 3.223796 | 1.395971 | 1.793779e+07 | 72.632358 | 68.233239 | 69.952786 | 74.011017 | 53.268492 | 68.545168 | 67.664149 | 66.581051 | 59.836953 | 73.102298 | 62.819641 | 73.845452 | 68.848599 | 62.093484 | 67.503935 | 74.741580 | 73.134089 | 72.225370 | 75.057287 | 71.783444 | 72.961284 | 73.248033 | 67.664778 | 74.118980 | 67.220648 | 68.621970 | 64.984577 | 53.234498 | 70.641171 | 70.658798 | 63.861819 | 71.953730 | 52.269751 | 53.264400 | 49.279194 | 10.474662 | 10.592698 | 10.667296 | 10.581681 | 10.533207 | 5.713528 | 0.692070 | 0.158802 |
plt.figure(figsize=(12,9))
x_axis=df_segm_pca_kmeans['Component 1']
y_axis=df_segm_pca_kmeans['Component 2']
sns.scatterplot(x_axis, y_axis, hue= df_segm_pca_kmeans['Segment k-means PCA'], palette=['g','r','c','m'])
plt.title('Clusters por componentes PCA')
plt.show()
COMO KMEAN UTILIZA LA TOTALIDAD DE INSTANCIAS QUE FORMAN EL DATA SET DE ANALISIS, CONSIDERAMOS PARA LA DEFINICION DE CLUSTERS SOLAMENTE EL ARRAY RESULTADO DEL DATA FRAME AL CUAL SE APLICO PCA
# Clustering K-Means sobre el array score_pca que obtubimos resultado de
# aplicar PCA en data frame estandarizado sin nulos df_train
kmeans_pca = KMeans(n_clusters= 20, init='k-means++', random_state=42)
kmeans_pca.fit(scores_pca)
KMeans(n_clusters=20, random_state=42)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
KMeans(n_clusters=20, random_state=42)
# Del tata frame que armamos con los resultados del PCS vamos a realizar una copia para trabajar los clusters de KMean
df_segm_pca_kmeans = df_segm_pca.copy()
# Al data frame que aramamos con los resultados de PCA le agrego la columna de los clusters que sale del analisis de KMeans
df_segm_pca_kmeans['Segment k-means PCA']= kmeans_pca.labels_
df_segm_pca_kmeans
| sofifa_id | overall | potential | value_eur | wage_eur | age | height_cm | weight_kg | club_team_id | league_level | club_jersey_number | club_contract_valid_until | nationality_id | weak_foot | skill_moves | international_reputation | release_clause_eur | pace | shooting | passing | dribbling | defending | physic | attacking_crossing | attacking_finishing | attacking_heading_accuracy | attacking_short_passing | attacking_volleys | skill_dribbling | skill_curve | skill_fk_accuracy | skill_long_passing | skill_ball_control | movement_acceleration | movement_sprint_speed | movement_agility | movement_reactions | movement_balance | power_shot_power | power_jumping | power_stamina | power_strength | power_long_shots | mentality_aggression | mentality_interceptions | mentality_positioning | mentality_vision | mentality_penalties | mentality_composure | defending_marking_awareness | defending_standing_tackle | defending_sliding_tackle | goalkeeping_diving | goalkeeping_handling | goalkeeping_kicking | goalkeeping_positioning | goalkeeping_reflexes | Component 1 | Component 2 | Component 3 | Segment k-means PCA | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 158023 | 93 | 93 | 78000000.0 | 320000.0 | 34 | 170 | 72 | 73.0 | 1.0 | 30.0 | 2023.0 | 52 | 4 | 4 | 5 | 144300000.0 | 85.0 | 92.0 | 91.0 | 95.0 | 34.0 | 65.0 | 85 | 95 | 70 | 91 | 88 | 96 | 93 | 94 | 91 | 96 | 91 | 80 | 91 | 94 | 95 | 86 | 68 | 72 | 69 | 94 | 44 | 40 | 93 | 95 | 75 | 96 | 20 | 35 | 24 | 6 | 11 | 15 | 14 | 8 | 19.982520 | 2.605102 | 0.105532 | 15 |
| 1 | 188545 | 92 | 92 | 119500000.0 | 270000.0 | 32 | 185 | 81 | 21.0 | 1.0 | 9.0 | 2023.0 | 37 | 4 | 4 | 5 | 197200000.0 | 78.0 | 92.0 | 79.0 | 86.0 | 44.0 | 82.0 | 71 | 95 | 90 | 85 | 89 | 85 | 79 | 85 | 70 | 88 | 77 | 79 | 77 | 93 | 82 | 90 | 85 | 76 | 86 | 87 | 81 | 49 | 95 | 81 | 90 | 88 | 35 | 42 | 19 | 15 | 6 | 12 | 8 | 10 | 18.187461 | 6.226451 | 2.637846 | 15 |
| 2 | 20801 | 91 | 91 | 45000000.0 | 270000.0 | 36 | 187 | 83 | 11.0 | 1.0 | 7.0 | 2023.0 | 38 | 4 | 5 | 5 | 83300000.0 | 87.0 | 94.0 | 80.0 | 88.0 | 34.0 | 75.0 | 87 | 95 | 90 | 80 | 86 | 88 | 81 | 84 | 77 | 88 | 85 | 88 | 86 | 94 | 74 | 94 | 95 | 77 | 77 | 93 | 63 | 29 | 95 | 76 | 88 | 95 | 24 | 32 | 24 | 7 | 11 | 15 | 14 | 11 | 17.756480 | 4.246571 | 4.387421 | 15 |
| 3 | 190871 | 91 | 91 | 129000000.0 | 270000.0 | 29 | 175 | 68 | 73.0 | 1.0 | 10.0 | 2025.0 | 54 | 5 | 5 | 5 | 238700000.0 | 91.0 | 83.0 | 86.0 | 94.0 | 37.0 | 63.0 | 85 | 83 | 63 | 86 | 86 | 95 | 88 | 87 | 81 | 95 | 93 | 89 | 96 | 89 | 84 | 80 | 64 | 81 | 53 | 81 | 63 | 37 | 86 | 90 | 93 | 93 | 35 | 32 | 29 | 9 | 9 | 15 | 15 | 11 | 20.421666 | 2.970799 | -1.634142 | 15 |
| 4 | 192985 | 91 | 91 | 125500000.0 | 350000.0 | 30 | 181 | 70 | 10.0 | 1.0 | 17.0 | 2025.0 | 7 | 5 | 4 | 4 | 232200000.0 | 76.0 | 86.0 | 93.0 | 88.0 | 64.0 | 78.0 | 94 | 82 | 55 | 94 | 82 | 88 | 85 | 83 | 93 | 91 | 76 | 76 | 79 | 91 | 78 | 91 | 63 | 89 | 74 | 91 | 76 | 66 | 88 | 94 | 83 | 89 | 68 | 65 | 53 | 15 | 13 | 5 | 10 | 13 | 19.879639 | 7.711465 | -1.883137 | 15 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 16015 | 261962 | 47 | 52 | 70000.0 | 1000.0 | 22 | 180 | 64 | 112541.0 | 1.0 | 36.0 | 2021.0 | 155 | 3 | 2 | 1 | 114000.0 | 58.0 | 35.0 | 46.0 | 48.0 | 42.0 | 49.0 | 46 | 32 | 48 | 50 | 30 | 45 | 33 | 38 | 48 | 49 | 56 | 60 | 55 | 53 | 70 | 46 | 62 | 51 | 46 | 30 | 52 | 42 | 38 | 43 | 42 | 37 | 38 | 43 | 48 | 6 | 10 | 5 | 15 | 13 | -6.479586 | -2.902181 | -0.563466 | 5 |
| 16016 | 262040 | 47 | 59 | 110000.0 | 500.0 | 19 | 175 | 70 | 445.0 | 1.0 | 27.0 | 2021.0 | 25 | 3 | 2 | 1 | 193000.0 | 59.0 | 39.0 | 50.0 | 46.0 | 41.0 | 51.0 | 54 | 33 | 46 | 51 | 32 | 41 | 53 | 31 | 50 | 42 | 60 | 58 | 64 | 49 | 69 | 49 | 47 | 63 | 47 | 38 | 49 | 39 | 51 | 49 | 44 | 47 | 37 | 44 | 47 | 11 | 12 | 6 | 8 | 10 | -5.156328 | -3.008489 | -0.760424 | 5 |
| 16017 | 262760 | 47 | 55 | 100000.0 | 500.0 | 21 | 178 | 72 | 111131.0 | 1.0 | 31.0 | 2021.0 | 25 | 3 | 2 | 1 | 175000.0 | 60.0 | 37.0 | 45.0 | 49.0 | 41.0 | 52.0 | 39 | 32 | 43 | 49 | 37 | 47 | 37 | 37 | 49 | 49 | 60 | 60 | 58 | 46 | 59 | 50 | 57 | 56 | 50 | 34 | 51 | 38 | 45 | 46 | 39 | 36 | 38 | 44 | 48 | 8 | 6 | 7 | 10 | 6 | -6.226043 | -2.958384 | -0.125007 | 5 |
| 16018 | 262820 | 47 | 60 | 110000.0 | 500.0 | 19 | 173 | 66 | 111131.0 | 1.0 | 12.0 | 2021.0 | 25 | 3 | 2 | 1 | 239000.0 | 68.0 | 46.0 | 36.0 | 48.0 | 15.0 | 42.0 | 29 | 49 | 40 | 38 | 34 | 42 | 36 | 34 | 33 | 45 | 69 | 67 | 72 | 48 | 73 | 48 | 50 | 50 | 40 | 41 | 34 | 14 | 47 | 40 | 49 | 47 | 10 | 14 | 11 | 7 | 10 | 7 | 14 | 15 | -5.841479 | -7.247968 | 0.886892 | 12 |
| 16019 | 264540 | 47 | 60 | 110000.0 | 500.0 | 19 | 167 | 61 | 113040.0 | 1.0 | 13.0 | 2025.0 | 159 | 3 | 2 | 1 | 217000.0 | 68.0 | 38.0 | 45.0 | 48.0 | 36.0 | 48.0 | 39 | 32 | 46 | 50 | 37 | 48 | 38 | 32 | 49 | 38 | 70 | 67 | 65 | 54 | 85 | 50 | 62 | 57 | 40 | 36 | 55 | 42 | 47 | 46 | 35 | 36 | 33 | 31 | 37 | 8 | 13 | 13 | 9 | 14 | -5.332407 | -4.526763 | -2.170106 | 5 |
16020 rows × 61 columns
# Media de todas la svariables numericas considerando la segmentacion que genero el PCA
df_segm_pca_kmeans_freq= df_segm_pca_kmeans.groupby(['Segment k-means PCA']).mean()
df_segm_pca_kmeans_freq
| sofifa_id | overall | potential | value_eur | wage_eur | age | height_cm | weight_kg | club_team_id | league_level | club_jersey_number | club_contract_valid_until | nationality_id | weak_foot | skill_moves | international_reputation | release_clause_eur | pace | shooting | passing | dribbling | defending | physic | attacking_crossing | attacking_finishing | attacking_heading_accuracy | attacking_short_passing | attacking_volleys | skill_dribbling | skill_curve | skill_fk_accuracy | skill_long_passing | skill_ball_control | movement_acceleration | movement_sprint_speed | movement_agility | movement_reactions | movement_balance | power_shot_power | power_jumping | power_stamina | power_strength | power_long_shots | mentality_aggression | mentality_interceptions | mentality_positioning | mentality_vision | mentality_penalties | mentality_composure | defending_marking_awareness | defending_standing_tackle | defending_sliding_tackle | goalkeeping_diving | goalkeeping_handling | goalkeeping_kicking | goalkeeping_positioning | goalkeeping_reflexes | Component 1 | Component 2 | Component 3 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Segment k-means PCA | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| 0 | 216112.430252 | 75.369748 | 77.736134 | 1.012773e+07 | 26545.378151 | 26.902521 | 176.310924 | 70.788235 | 30473.821849 | 1.065546 | 18.114286 | 2023.378151 | 55.065546 | 3.435294 | 3.662185 | 1.389916 | 1.941173e+07 | 78.647059 | 71.833613 | 71.947899 | 77.677311 | 43.388235 | 63.710924 | 70.877311 | 71.327731 | 55.398319 | 73.969748 | 67.013445 | 78.060504 | 73.102521 | 66.709244 | 67.932773 | 77.213445 | 79.836975 | 77.727731 | 81.677311 | 72.542857 | 78.351261 | 74.648739 | 65.263866 | 72.011765 | 61.346218 | 71.934454 | 58.766387 | 42.668908 | 73.458824 | 73.257143 | 67.126050 | 73.196639 | 42.299160 | 42.549580 | 38.144538 | 10.504202 | 10.517647 | 10.678992 | 10.423529 | 10.326050 | 7.076284 | -1.217613 | -0.120966 |
| 1 | 248232.298356 | 61.295223 | 69.577134 | 7.258692e+05 | 1794.635865 | 22.413469 | 177.083790 | 70.483947 | 63017.663273 | 1.417384 | 23.498825 | 2022.635865 | 63.750196 | 2.852780 | 2.139389 | 1.000000 | 1.396861e+06 | 69.870791 | 43.400940 | 54.838684 | 60.070478 | 55.958496 | 60.198121 | 54.189507 | 40.655442 | 49.650744 | 60.283477 | 36.675020 | 58.685983 | 45.622553 | 40.530149 | 54.848081 | 59.685983 | 70.243540 | 69.566171 | 67.288958 | 56.906030 | 70.335944 | 49.926390 | 62.676586 | 65.913078 | 58.180893 | 42.525450 | 57.337510 | 55.970243 | 51.805012 | 51.715740 | 43.483947 | 52.819890 | 54.779170 | 58.814409 | 56.791699 | 10.152702 | 10.198904 | 10.094753 | 10.083007 | 10.025059 | -2.005618 | -0.670664 | -1.832826 |
| 2 | 207708.524709 | 71.515988 | 72.998547 | 4.173924e+06 | 16282.848837 | 29.213663 | 187.606105 | 81.710756 | 35743.393895 | 1.251453 | 14.776163 | 2022.840116 | 49.550872 | 2.886628 | 2.037791 | 1.215116 | 7.913772e+06 | 54.136628 | 43.824128 | 56.588663 | 57.500000 | 71.529070 | 76.367733 | 47.007267 | 37.697674 | 71.393895 | 66.488372 | 36.867733 | 53.703488 | 43.425872 | 40.619186 | 63.181686 | 62.361919 | 52.470930 | 55.484012 | 52.405523 | 67.799419 | 52.438953 | 59.260174 | 72.117733 | 68.655523 | 81.151163 | 43.688953 | 74.976744 | 71.113372 | 42.529070 | 51.117733 | 45.751453 | 66.649709 | 71.372093 | 72.409884 | 69.831395 | 10.902616 | 10.694767 | 10.720930 | 10.808140 | 10.716570 | -1.342490 | 5.255720 | 1.120873 |
| 3 | 249348.283892 | 59.641148 | 67.872408 | 5.833732e+05 | 1751.036683 | 22.606061 | 184.186603 | 76.754386 | 60781.094099 | 1.483254 | 26.598086 | 2022.657097 | 60.969697 | 2.861244 | 2.215311 | 1.000000 | 1.131982e+06 | 66.003190 | 58.963317 | 45.939394 | 57.851675 | 25.414673 | 60.835726 | 39.161085 | 61.422648 | 59.022329 | 52.570973 | 50.840510 | 57.392344 | 43.398724 | 36.027113 | 38.808612 | 58.481659 | 65.291866 | 66.548644 | 60.682616 | 55.311005 | 60.100478 | 60.325359 | 65.019139 | 58.803828 | 67.891547 | 54.090909 | 44.559809 | 20.740032 | 58.687400 | 49.481659 | 58.291866 | 52.331738 | 22.454545 | 21.735247 | 20.448166 | 10.231260 | 10.051037 | 10.226475 | 10.151515 | 10.043062 | -2.138644 | -3.439418 | 3.279832 |
| 4 | 204156.807229 | 73.100402 | 74.044177 | 5.184839e+06 | 19286.345382 | 29.524096 | 183.584337 | 79.064257 | 41733.028112 | 1.126506 | 17.943775 | 2022.941767 | 52.526104 | 3.391566 | 3.118474 | 1.353414 | 9.660584e+06 | 68.564257 | 73.038153 | 65.771084 | 71.787149 | 39.180723 | 71.650602 | 61.769076 | 73.411647 | 69.325301 | 69.963855 | 69.662651 | 71.923695 | 66.190763 | 59.016064 | 60.427711 | 73.530120 | 67.973896 | 69.026104 | 69.349398 | 71.445783 | 64.959839 | 76.319277 | 71.046185 | 69.212851 | 75.716867 | 69.923695 | 64.578313 | 37.311245 | 74.530120 | 67.839357 | 70.532129 | 71.765060 | 36.411647 | 35.911647 | 30.650602 | 10.562249 | 10.803213 | 10.708835 | 10.815261 | 10.620482 | 5.091056 | -0.006053 | 3.256876 |
| 5 | 255412.606516 | 55.949875 | 66.523810 | 3.352068e+05 | 1154.887218 | 21.109023 | 177.631579 | 70.254386 | 68438.612782 | 1.339599 | 26.335840 | 2022.700501 | 80.894737 | 2.662907 | 2.036341 | 1.000000 | 6.730464e+05 | 67.020050 | 33.953634 | 46.220551 | 52.538847 | 51.857143 | 56.850877 | 47.317043 | 31.355890 | 46.404762 | 51.261905 | 30.005013 | 50.681704 | 37.620301 | 33.738095 | 44.847118 | 50.903509 | 67.441103 | 66.662907 | 62.467419 | 51.844612 | 67.674185 | 38.942356 | 61.899749 | 60.845865 | 55.552632 | 32.204261 | 53.790727 | 51.508772 | 44.229323 | 42.383459 | 37.848371 | 44.307018 | 50.325815 | 54.760652 | 53.593985 | 10.052632 | 10.088972 | 9.884712 | 10.008772 | 9.973684 | -5.114706 | -1.453073 | -1.773854 |
| 6 | 207526.419847 | 77.297710 | 78.763359 | 1.380620e+07 | 41025.667939 | 28.402672 | 181.954198 | 76.797710 | 15113.872137 | 1.036260 | 17.503817 | 2023.379771 | 43.236641 | 3.238550 | 2.952290 | 1.736641 | 2.627233e+07 | 67.488550 | 64.070611 | 72.784351 | 73.734733 | 73.675573 | 75.700382 | 69.330153 | 59.463740 | 66.944656 | 77.362595 | 57.536260 | 73.070611 | 67.677481 | 59.938931 | 74.129771 | 76.383588 | 67.208015 | 67.664122 | 68.938931 | 75.952290 | 68.631679 | 73.893130 | 72.270992 | 78.866412 | 73.958015 | 66.477099 | 76.811069 | 74.851145 | 67.278626 | 71.500000 | 59.513359 | 75.646947 | 73.654580 | 75.368321 | 72.368321 | 10.534351 | 10.431298 | 10.488550 | 10.631679 | 10.511450 | 5.936100 | 4.447591 | -0.571526 |
| 7 | 223207.879917 | 71.497930 | 74.422360 | 3.700016e+06 | 13711.283644 | 26.485507 | 176.767081 | 71.240166 | 32880.392340 | 1.168737 | 17.762940 | 2023.166667 | 54.507246 | 3.125259 | 2.942029 | 1.056936 | 6.937540e+06 | 73.508282 | 60.311594 | 68.225673 | 71.409938 | 64.680124 | 69.163561 | 66.854037 | 56.841615 | 55.719462 | 71.596273 | 52.892340 | 70.590062 | 65.360248 | 57.383023 | 67.755694 | 71.552795 | 74.125259 | 73.008282 | 75.514493 | 68.973085 | 74.962733 | 67.711180 | 67.892340 | 77.810559 | 65.350932 | 62.492754 | 68.026915 | 66.076605 | 64.944099 | 67.394410 | 55.580745 | 68.396480 | 64.258799 | 67.120083 | 64.164596 | 10.336439 | 10.656315 | 10.674948 | 10.569358 | 10.450311 | 3.857839 | 1.303945 | -1.614699 |
| 8 | 235102.669323 | 65.533068 | 70.627092 | 1.282036e+06 | 3775.019920 | 24.738645 | 176.406375 | 70.276494 | 54114.700398 | 1.484462 | 20.322709 | 2022.805578 | 59.025498 | 3.029482 | 2.627888 | 1.001594 | 2.360627e+06 | 72.894821 | 54.649402 | 61.909960 | 66.278088 | 57.194422 | 63.321912 | 59.932271 | 52.671713 | 50.331474 | 65.769721 | 46.841434 | 65.134661 | 56.882072 | 50.546614 | 61.671713 | 65.709163 | 73.508367 | 72.347410 | 73.764143 | 61.726693 | 74.356175 | 60.453386 | 64.614343 | 72.428685 | 59.949004 | 54.924303 | 59.866135 | 57.692430 | 59.600000 | 61.304382 | 50.413546 | 60.733865 | 56.474900 | 59.797610 | 57.009562 | 10.219124 | 10.437450 | 10.066135 | 10.157769 | 10.365737 | 1.186529 | -0.404757 | -1.598712 |
| 9 | 222410.418605 | 66.047965 | 68.563953 | 1.038983e+06 | 4561.264535 | 27.925872 | 189.244186 | 82.815407 | 54249.171512 | 1.539244 | 15.417151 | 2022.517442 | 50.786337 | 2.677326 | 2.004360 | 1.018895 | 1.868052e+06 | 49.559593 | 32.354651 | 45.071221 | 46.550872 | 65.800872 | 73.970930 | 34.334302 | 27.375000 | 66.986919 | 57.059593 | 28.213663 | 40.379360 | 30.947674 | 29.686047 | 50.847384 | 51.966570 | 48.023256 | 50.738372 | 45.069767 | 60.803779 | 46.626453 | 46.860465 | 69.079942 | 63.765988 | 81.768895 | 28.765988 | 68.402616 | 64.385174 | 31.068314 | 37.764535 | 38.463663 | 57.549419 | 65.257267 | 67.161337 | 64.549419 | 10.611919 | 10.638081 | 10.375000 | 10.517442 | 10.510174 | -5.720880 | 4.333934 | 1.666305 |
| 10 | 208708.425947 | 70.225029 | 71.770379 | 2.622744e+06 | 11412.686567 | 28.982778 | 182.845006 | 77.238806 | 37174.468427 | 1.329506 | 17.148106 | 2022.819747 | 49.174512 | 3.045924 | 2.486797 | 1.105626 | 4.885776e+06 | 62.017222 | 56.896670 | 65.078071 | 65.972445 | 67.686567 | 73.915040 | 61.526980 | 51.755454 | 63.934558 | 69.731343 | 49.443169 | 64.462687 | 58.583238 | 53.792193 | 66.988519 | 68.247991 | 61.864524 | 62.078071 | 64.894374 | 67.877153 | 63.935706 | 67.451206 | 70.723307 | 74.894374 | 74.166475 | 59.704937 | 72.690011 | 68.429392 | 59.270953 | 63.335247 | 54.419059 | 67.591274 | 67.522388 | 68.825488 | 66.425947 | 10.730195 | 10.802526 | 10.855339 | 10.675086 | 10.718714 | 2.022878 | 3.222367 | 0.311822 |
| 11 | 230154.957207 | 69.100225 | 72.932432 | 2.311070e+06 | 7944.819820 | 25.222973 | 173.958333 | 68.265766 | 49816.217342 | 1.248874 | 20.131757 | 2023.104730 | 61.976351 | 3.314189 | 3.225225 | 1.006757 | 4.331260e+06 | 79.288288 | 64.020270 | 64.354730 | 71.891892 | 36.568694 | 56.754505 | 64.115991 | 64.215090 | 48.059685 | 66.726351 | 57.957207 | 71.838964 | 64.018018 | 57.975225 | 59.940315 | 70.207207 | 80.331081 | 78.439189 | 81.000000 | 64.087838 | 79.074324 | 66.787162 | 61.418919 | 67.584459 | 54.140766 | 62.854730 | 48.557432 | 34.430180 | 65.716216 | 65.363739 | 59.467342 | 64.379505 | 36.293919 | 35.415541 | 33.129505 | 10.433559 | 10.406532 | 10.420045 | 10.377252 | 10.375000 | 3.650941 | -3.370163 | -0.578367 |
| 12 | 256407.582849 | 54.927326 | 67.540698 | 3.098328e+05 | 1187.863372 | 20.058140 | 175.843023 | 68.470930 | 66072.376453 | 1.412791 | 29.758721 | 2022.742733 | 77.181686 | 2.859012 | 2.226744 | 1.000000 | 6.343735e+05 | 71.069767 | 49.933140 | 47.465116 | 57.302326 | 27.481105 | 47.111919 | 45.507267 | 51.427326 | 41.331395 | 50.950581 | 43.486919 | 56.675872 | 44.405523 | 37.811047 | 43.363372 | 55.550872 | 71.375000 | 70.793605 | 67.399709 | 47.736919 | 71.444767 | 52.898256 | 53.614826 | 51.885174 | 47.888081 | 44.869186 | 37.404070 | 23.390988 | 50.850291 | 49.414244 | 49.622093 | 48.340116 | 25.947674 | 27.199128 | 26.728198 | 9.933140 | 10.069767 | 10.011628 | 10.052326 | 10.072674 | -3.061086 | -5.410057 | -0.001683 |
| 13 | 237801.623596 | 64.518727 | 69.941948 | 1.083797e+06 | 3108.520599 | 24.865169 | 184.892322 | 77.667603 | 59500.345506 | 1.385768 | 18.177903 | 2022.691011 | 59.915730 | 2.788390 | 2.010300 | 1.001873 | 2.026937e+06 | 63.235019 | 35.320225 | 50.204120 | 53.529963 | 63.799625 | 69.882022 | 44.498127 | 30.746255 | 62.036517 | 59.948502 | 31.288390 | 48.757491 | 36.385768 | 33.074906 | 54.173221 | 56.968165 | 62.303371 | 64.001873 | 57.478464 | 59.944757 | 58.837079 | 46.823034 | 71.267790 | 66.588951 | 73.364232 | 33.321161 | 64.835206 | 63.079588 | 37.525281 | 43.501873 | 40.137640 | 55.671348 | 63.102996 | 65.557116 | 63.335206 | 10.279963 | 10.306180 | 10.303371 | 10.361423 | 10.149813 | -3.899826 | 2.441861 | -0.402474 |
| 14 | 254080.751488 | 57.062500 | 67.316964 | 3.893378e+05 | 1270.312500 | 21.534226 | 185.174107 | 76.858631 | 64822.502976 | 1.403274 | 25.028274 | 2022.663690 | 71.446429 | 2.626488 | 2.001488 | 1.000000 | 7.749821e+05 | 59.352679 | 27.136905 | 37.250000 | 40.550595 | 57.229167 | 63.072917 | 31.014881 | 23.144345 | 55.340774 | 46.031250 | 26.456845 | 33.803571 | 28.208333 | 26.833333 | 38.236607 | 42.566964 | 58.861607 | 59.717262 | 49.171131 | 51.190476 | 56.686012 | 37.982143 | 69.226190 | 58.825893 | 67.958333 | 22.940476 | 54.458333 | 55.467262 | 27.904762 | 32.086310 | 35.867560 | 44.924107 | 55.964286 | 59.898810 | 57.799107 | 10.098214 | 10.220238 | 10.040179 | 10.013393 | 10.184524 | -8.051237 | 0.749717 | -0.051299 |
| 15 | 204809.000000 | 83.145228 | 84.738589 | 4.526805e+07 | 107867.219917 | 27.746888 | 178.738589 | 73.448133 | 6621.381743 | 1.004149 | 16.282158 | 2023.738589 | 39.066390 | 3.630705 | 3.763485 | 2.755187 | 8.628382e+07 | 76.647303 | 76.427386 | 79.863071 | 82.954357 | 60.966805 | 71.734440 | 77.572614 | 75.004149 | 63.568465 | 82.804979 | 72.788382 | 83.128631 | 79.423237 | 71.095436 | 77.360996 | 84.402490 | 77.917012 | 75.601660 | 80.543568 | 82.340249 | 79.439834 | 80.273859 | 68.219917 | 80.008299 | 68.452282 | 77.327801 | 70.481328 | 62.045643 | 79.800830 | 81.062241 | 70.008299 | 82.016598 | 60.215768 | 61.414938 | 56.518672 | 10.518672 | 10.543568 | 10.522822 | 10.468880 | 10.800830 | 11.653637 | 3.378673 | -0.967456 |
| 16 | 228236.326722 | 66.295407 | 69.707724 | 1.335094e+06 | 4641.075157 | 26.199374 | 180.987474 | 75.312109 | 55502.781837 | 1.422756 | 20.689979 | 2022.737996 | 59.412317 | 3.217119 | 2.812109 | 1.001044 | 2.447824e+06 | 73.369520 | 64.616910 | 57.907098 | 66.606472 | 32.686848 | 64.535491 | 55.188935 | 65.580376 | 59.235908 | 62.360125 | 58.395616 | 66.580376 | 56.417537 | 49.345511 | 52.437370 | 66.463466 | 73.264092 | 73.402923 | 71.028184 | 62.569937 | 67.339248 | 67.304802 | 66.782881 | 66.361169 | 68.284969 | 61.699374 | 52.172234 | 29.253653 | 65.447808 | 59.398747 | 61.581420 | 61.461378 | 30.930063 | 29.604384 | 27.124217 | 10.424843 | 10.555324 | 10.632568 | 10.458246 | 10.539666 | 1.812122 | -2.445066 | 2.080364 |
| 17 | 228946.685903 | 66.241550 | 69.839242 | 1.325433e+06 | 4375.845012 | 26.195383 | 181.400660 | 75.192086 | 54195.488871 | 1.424567 | 17.861500 | 2022.690025 | 57.940643 | 2.905194 | 2.206925 | 1.000824 | 2.421913e+06 | 67.046167 | 47.754328 | 59.058533 | 62.187964 | 63.331410 | 70.144270 | 56.595218 | 43.374279 | 58.427865 | 64.902721 | 40.065952 | 60.375103 | 49.424567 | 44.271228 | 60.841715 | 63.489695 | 66.890354 | 67.145095 | 65.311624 | 62.662819 | 65.293487 | 57.258862 | 68.644683 | 72.755977 | 70.332234 | 48.768343 | 66.672712 | 63.623248 | 53.154163 | 55.916735 | 46.760099 | 60.171476 | 62.906843 | 65.135202 | 63.046991 | 10.507007 | 10.497939 | 10.553174 | 10.499588 | 10.551525 | -0.460311 | 1.806371 | -0.719833 |
| 18 | 248548.434160 | 62.238550 | 71.106870 | 8.781823e+05 | 2070.658397 | 22.004771 | 175.175573 | 68.613550 | 55878.683206 | 1.371183 | 26.430344 | 2022.857824 | 61.100191 | 3.047710 | 2.724237 | 1.000954 | 1.717470e+06 | 75.098282 | 56.201336 | 56.639313 | 65.171756 | 33.556298 | 51.457061 | 55.959924 | 57.101145 | 44.216603 | 60.166985 | 49.584924 | 65.241412 | 53.565840 | 46.297710 | 52.860687 | 63.695611 | 75.874046 | 74.437977 | 74.107824 | 55.160305 | 74.578244 | 59.437977 | 55.997137 | 58.926527 | 50.567748 | 52.799618 | 43.113550 | 29.376908 | 57.561069 | 57.191794 | 53.465649 | 55.764313 | 32.793893 | 33.733779 | 32.615458 | 10.067748 | 10.065840 | 10.012405 | 10.299618 | 9.998092 | 0.202826 | -4.296691 | -0.545569 |
| 19 | 213270.827133 | 66.938731 | 68.315098 | 1.234912e+06 | 6252.078775 | 29.155361 | 188.218818 | 83.435449 | 52575.336980 | 1.439825 | 20.203501 | 2022.542670 | 52.317287 | 3.111597 | 2.439825 | 1.039387 | 2.112678e+06 | 58.822757 | 67.094092 | 52.363239 | 62.063457 | 32.371991 | 73.070022 | 44.223195 | 68.630197 | 70.621444 | 59.796499 | 62.100656 | 61.827133 | 50.326039 | 43.487965 | 45.264770 | 65.249453 | 57.188184 | 60.124726 | 56.367615 | 64.332604 | 51.757112 | 70.575492 | 68.553611 | 64.072210 | 82.234136 | 61.133479 | 62.376368 | 27.938731 | 68.984683 | 55.452954 | 65.638950 | 63.956236 | 29.496718 | 28.085339 | 24.155361 | 10.719912 | 10.625821 | 10.958425 | 10.851204 | 10.927790 | 0.547014 | -0.314045 | 5.449197 |
plt.figure(figsize=(12,9))
x_axis=df_segm_pca_kmeans['Component 2']
y_axis=df_segm_pca_kmeans['Component 3']
sns.scatterplot(x_axis, y_axis, hue= df_segm_pca_kmeans['Segment k-means PCA'])
plt.title('Clusters por componentes PCA')
plt.show()
df_segm_pca_kmeans['Legend']= df_segm_pca_kmeans['Segment k-means PCA'].map({0:'Primer Cluster',1:'Segundo Cluster',2: 'Tercer Cluster',3:'Cuarto Cluster'})
plt.figure(figsize=(12,9))
x_axis=df_segm_pca_kmeans['Component 1']
y_axis=df_segm_pca_kmeans['Component 2']
sns.scatterplot(x_axis, y_axis, hue= df_segm_pca_kmeans['Legend'])
<AxesSubplot:xlabel='Component 1', ylabel='Component 2'>
#prueba con mezcla de gaussianas con PCA. Su procesamiento lleva tiempo
bic = []
aic = []
for i in range(8):
gm = GaussianMixture(n_components = i+1, n_init = 10, max_iter = 100)
gm.fit(df_segm_pca)
bic.append(gm.bic(df_segm_pca))
aic.append(gm.aic(df_segm_pca))
fig = plt.figure()
plt.plot([1,2,3,4,5,6,7,8], aic)
plt.plot([1,2,3,4,5,6,7,8], bic)
plt.show()
gm_pca = GaussianMixture(n_components = 5, random_state =42)
gm_pca.fit(df_segm_pca)
predicted_values = gm_pca.predict(df_segm_pca)
# Del tata frame que armamos con los resultados del PCA vamos a realizar una copia para trabajar los clusters de MG
df_segm_pca_gm = df_segm_pca.copy()
# Al data frame que aramamos con los resultados de PCA le agrego la columna de los clusters que sale del analisis de MG
df_segm_pca_gm['Segment GM PCA']= predicted_values
df_segm_pca_gm
| sofifa_id | overall | potential | value_eur | wage_eur | age | height_cm | weight_kg | club_team_id | league_level | club_jersey_number | club_contract_valid_until | nationality_id | weak_foot | skill_moves | international_reputation | release_clause_eur | pace | shooting | passing | dribbling | defending | physic | attacking_crossing | attacking_finishing | attacking_heading_accuracy | attacking_short_passing | attacking_volleys | skill_dribbling | skill_curve | skill_fk_accuracy | skill_long_passing | skill_ball_control | movement_acceleration | movement_sprint_speed | movement_agility | movement_reactions | movement_balance | power_shot_power | power_jumping | power_stamina | power_strength | power_long_shots | mentality_aggression | mentality_interceptions | mentality_positioning | mentality_vision | mentality_penalties | mentality_composure | defending_marking_awareness | defending_standing_tackle | defending_sliding_tackle | goalkeeping_diving | goalkeeping_handling | goalkeeping_kicking | goalkeeping_positioning | goalkeeping_reflexes | Component 1 | Component 2 | Component 3 | Segment GM PCA | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 158023 | 93 | 93 | 78000000.0 | 320000.0 | 34 | 170 | 72 | 73.0 | 1.0 | 30.0 | 2023.0 | 52 | 4 | 4 | 5 | 144300000.0 | 85.0 | 92.0 | 91.0 | 95.0 | 34.0 | 65.0 | 85 | 95 | 70 | 91 | 88 | 96 | 93 | 94 | 91 | 96 | 91 | 80 | 91 | 94 | 95 | 86 | 68 | 72 | 69 | 94 | 44 | 40 | 93 | 95 | 75 | 96 | 20 | 35 | 24 | 6 | 11 | 15 | 14 | 8 | 19.982520 | 2.605102 | 0.105532 | 3 |
| 1 | 188545 | 92 | 92 | 119500000.0 | 270000.0 | 32 | 185 | 81 | 21.0 | 1.0 | 9.0 | 2023.0 | 37 | 4 | 4 | 5 | 197200000.0 | 78.0 | 92.0 | 79.0 | 86.0 | 44.0 | 82.0 | 71 | 95 | 90 | 85 | 89 | 85 | 79 | 85 | 70 | 88 | 77 | 79 | 77 | 93 | 82 | 90 | 85 | 76 | 86 | 87 | 81 | 49 | 95 | 81 | 90 | 88 | 35 | 42 | 19 | 15 | 6 | 12 | 8 | 10 | 18.187461 | 6.226451 | 2.637846 | 3 |
| 2 | 20801 | 91 | 91 | 45000000.0 | 270000.0 | 36 | 187 | 83 | 11.0 | 1.0 | 7.0 | 2023.0 | 38 | 4 | 5 | 5 | 83300000.0 | 87.0 | 94.0 | 80.0 | 88.0 | 34.0 | 75.0 | 87 | 95 | 90 | 80 | 86 | 88 | 81 | 84 | 77 | 88 | 85 | 88 | 86 | 94 | 74 | 94 | 95 | 77 | 77 | 93 | 63 | 29 | 95 | 76 | 88 | 95 | 24 | 32 | 24 | 7 | 11 | 15 | 14 | 11 | 17.756480 | 4.246571 | 4.387421 | 1 |
| 3 | 190871 | 91 | 91 | 129000000.0 | 270000.0 | 29 | 175 | 68 | 73.0 | 1.0 | 10.0 | 2025.0 | 54 | 5 | 5 | 5 | 238700000.0 | 91.0 | 83.0 | 86.0 | 94.0 | 37.0 | 63.0 | 85 | 83 | 63 | 86 | 86 | 95 | 88 | 87 | 81 | 95 | 93 | 89 | 96 | 89 | 84 | 80 | 64 | 81 | 53 | 81 | 63 | 37 | 86 | 90 | 93 | 93 | 35 | 32 | 29 | 9 | 9 | 15 | 15 | 11 | 20.421666 | 2.970799 | -1.634142 | 3 |
| 4 | 192985 | 91 | 91 | 125500000.0 | 350000.0 | 30 | 181 | 70 | 10.0 | 1.0 | 17.0 | 2025.0 | 7 | 5 | 4 | 4 | 232200000.0 | 76.0 | 86.0 | 93.0 | 88.0 | 64.0 | 78.0 | 94 | 82 | 55 | 94 | 82 | 88 | 85 | 83 | 93 | 91 | 76 | 76 | 79 | 91 | 78 | 91 | 63 | 89 | 74 | 91 | 76 | 66 | 88 | 94 | 83 | 89 | 68 | 65 | 53 | 15 | 13 | 5 | 10 | 13 | 19.879639 | 7.711465 | -1.883137 | 3 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 16015 | 261962 | 47 | 52 | 70000.0 | 1000.0 | 22 | 180 | 64 | 112541.0 | 1.0 | 36.0 | 2021.0 | 155 | 3 | 2 | 1 | 114000.0 | 58.0 | 35.0 | 46.0 | 48.0 | 42.0 | 49.0 | 46 | 32 | 48 | 50 | 30 | 45 | 33 | 38 | 48 | 49 | 56 | 60 | 55 | 53 | 70 | 46 | 62 | 51 | 46 | 30 | 52 | 42 | 38 | 43 | 42 | 37 | 38 | 43 | 48 | 6 | 10 | 5 | 15 | 13 | -6.479586 | -2.902181 | -0.563466 | 2 |
| 16016 | 262040 | 47 | 59 | 110000.0 | 500.0 | 19 | 175 | 70 | 445.0 | 1.0 | 27.0 | 2021.0 | 25 | 3 | 2 | 1 | 193000.0 | 59.0 | 39.0 | 50.0 | 46.0 | 41.0 | 51.0 | 54 | 33 | 46 | 51 | 32 | 41 | 53 | 31 | 50 | 42 | 60 | 58 | 64 | 49 | 69 | 49 | 47 | 63 | 47 | 38 | 49 | 39 | 51 | 49 | 44 | 47 | 37 | 44 | 47 | 11 | 12 | 6 | 8 | 10 | -5.156328 | -3.008489 | -0.760424 | 2 |
| 16017 | 262760 | 47 | 55 | 100000.0 | 500.0 | 21 | 178 | 72 | 111131.0 | 1.0 | 31.0 | 2021.0 | 25 | 3 | 2 | 1 | 175000.0 | 60.0 | 37.0 | 45.0 | 49.0 | 41.0 | 52.0 | 39 | 32 | 43 | 49 | 37 | 47 | 37 | 37 | 49 | 49 | 60 | 60 | 58 | 46 | 59 | 50 | 57 | 56 | 50 | 34 | 51 | 38 | 45 | 46 | 39 | 36 | 38 | 44 | 48 | 8 | 6 | 7 | 10 | 6 | -6.226043 | -2.958384 | -0.125007 | 2 |
| 16018 | 262820 | 47 | 60 | 110000.0 | 500.0 | 19 | 173 | 66 | 111131.0 | 1.0 | 12.0 | 2021.0 | 25 | 3 | 2 | 1 | 239000.0 | 68.0 | 46.0 | 36.0 | 48.0 | 15.0 | 42.0 | 29 | 49 | 40 | 38 | 34 | 42 | 36 | 34 | 33 | 45 | 69 | 67 | 72 | 48 | 73 | 48 | 50 | 50 | 40 | 41 | 34 | 14 | 47 | 40 | 49 | 47 | 10 | 14 | 11 | 7 | 10 | 7 | 14 | 15 | -5.841479 | -7.247968 | 0.886892 | 2 |
| 16019 | 264540 | 47 | 60 | 110000.0 | 500.0 | 19 | 167 | 61 | 113040.0 | 1.0 | 13.0 | 2025.0 | 159 | 3 | 2 | 1 | 217000.0 | 68.0 | 38.0 | 45.0 | 48.0 | 36.0 | 48.0 | 39 | 32 | 46 | 50 | 37 | 48 | 38 | 32 | 49 | 38 | 70 | 67 | 65 | 54 | 85 | 50 | 62 | 57 | 40 | 36 | 55 | 42 | 47 | 46 | 35 | 36 | 33 | 31 | 37 | 8 | 13 | 13 | 9 | 14 | -5.332407 | -4.526763 | -2.170106 | 2 |
16020 rows × 61 columns
# Cantidad de registros asociados a cada clusters
df_segm_pca_gm['Segment GM PCA'].value_counts()
2 10040 0 3041 1 2267 4 631 3 41 Name: Segment GM PCA, dtype: int64
plt.figure(figsize=(12,9))
x_axis=df_segm_pca_gm['Component 1']
y_axis=df_segm_pca_gm['Component 2']
sns.scatterplot(x_axis, y_axis, hue= df_segm_pca_gm['Segment GM PCA'])
plt.title('Clusters por componentes PCA')
plt.show()